From 3d9958dfbb95335d6daeae75545bea97ec0fdd37 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Sat, 17 Dec 2011 16:38:25 +0100 Subject: [PATCH] Parse Safari bookmark files. --- doc/changelog.txt | 1 + linkcheck/bookmarks/safari.py | 109 ++++++++++++++++++ linkcheck/checker/urlbase.py | 10 ++ linkcheck/fileutil.py | 6 +- linkcheck/gui/lineedit.py | 9 ++ tests/__init__.py | 21 ++++ tests/checker/__init__.py | 11 +- .../checker/data/plist_binary/Bookmarks.plist | Bin 0 -> 721 bytes .../data/plist_binary/Bookmarks.plist.result | 19 +++ tests/checker/data/plist_xml/Bookmarks.plist | 73 ++++++++++++ .../data/plist_xml/Bookmarks.plist.result | 21 ++++ tests/checker/test_bookmarks.py | 54 +++++++++ tests/checker/test_file.py | 12 -- tests/test_fileutil.py | 14 ++- 14 files changed, 336 insertions(+), 24 deletions(-) create mode 100644 linkcheck/bookmarks/safari.py create mode 100644 tests/checker/data/plist_binary/Bookmarks.plist create mode 100644 tests/checker/data/plist_binary/Bookmarks.plist.result create mode 100644 tests/checker/data/plist_xml/Bookmarks.plist create mode 100644 tests/checker/data/plist_xml/Bookmarks.plist.result create mode 100644 tests/checker/test_bookmarks.py diff --git a/doc/changelog.txt b/doc/changelog.txt index e4614928..9a588c76 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -19,6 +19,7 @@ Features: the configuration file. - gui: Add configuration for ignore URL patterns. Closes: SF bug #3311262 +- checking: Support parsing of Safari Bookmark files. 7.2 "Driver" (released 20.10.2011) diff --git a/linkcheck/bookmarks/safari.py b/linkcheck/bookmarks/safari.py new file mode 100644 index 00000000..cf655e1e --- /dev/null +++ b/linkcheck/bookmarks/safari.py @@ -0,0 +1,109 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2011 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import os +import sys +import plistlib +try: + import biplist + has_biplist = True +except ImportError: + has_biplist = False + + +def get_profile_dir (): + """Return path where all profiles of current user are stored.""" + basedir = unicode(os.environ["HOME"]) + return os.path.join(basedir, u"Library", u"Safari") + + +def find_bookmark_file (): + """Return the bookmark file of the Default profile. + Returns absolute filename if found, or empty string if no bookmark file + could be found. + """ + if sys.platform != 'darwin': + return u"" + dirname = get_profile_dir() + if os.path.isdir(dirname): + fname = os.path.join(dirname, u"Bookmarks.plist") + if os.path.isfile(fname): + return fname + return u"" + + +def parse_bookmark_file (filename): + """Return iterator for bookmarks of the form (url, name). + Bookmarks are not sorted. + """ + return parse_plist(get_plist_data_from_file(filename)) + + +def parse_bookmark_data (data): + """Return iterator for bookmarks of the form (url, name). + Bookmarks are not sorted. + """ + return parse_plist(get_plist_data_from_string(data)) + + +def get_plist_data_from_file (filename): + if has_biplist: + return biplist.readPlist(filename) + # fall back to normal plistlist + try: + return plistlib.readPlist(filename) + except Exception: + # not parseable (eg. not well-formed, or binary) + return {} + + +def get_plist_data_from_string (data): + if has_biplist: + return biplist.readPlistFromString(data) + # fall back to normal plistlist + try: + return plistlib.readPlistFromString(data) + except Exception: + # not parseable (eg. not well-formed, or binary) + return {} + + +# some key strings +KEY_URLSTRING = 'URLString' +KEY_URIDICTIONARY = 'URIDictionary' +KEY_CHILDREN = 'Children' +KEY_WEBBOOKMARKTYPE = 'WebBookmarkType' + +def parse_plist(entry): + """Parse a XML dictionary entry.""" + if is_leaf(entry): + url = entry[KEY_URLSTRING] + title = entry[KEY_URIDICTIONARY].get('title', url) + yield (url, title) + elif has_children(entry): + for child in entry[KEY_CHILDREN]: + for item in parse_plist(child): + yield item + + +def is_leaf (entry): + """Return true if plist entry is an URL entry.""" + return entry.get(KEY_WEBBOOKMARKTYPE) == 'WebBookmarkTypeLeaf' + + +def has_children (entry): + return entry.get(KEY_WEBBOOKMARKTYPE) == 'WebBookmarkTypeList' diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py index 7f4b7be7..c609d28f 100644 --- a/linkcheck/checker/urlbase.py +++ b/linkcheck/checker/urlbase.py @@ -99,6 +99,7 @@ class UrlBase (object): "text/plain+linkchecker": "text", "text/plain+opera": "opera", "text/plain+chromium": "chromium", + "application/x-plist+safari": "safari", } # Set maximum file size for downloaded files in bytes. @@ -981,6 +982,15 @@ class UrlBase (object): self.aggregate, parent_url=self.url, name=name) self.aggregate.urlqueue.put(url_data) + def parse_safari (self): + """Parse a Safari bookmark file.""" + log.debug(LOG_CHECK, "Parsing Safari bookmarks %s", self) + from ..bookmarks.safari import parse_bookmark_data + for url, name in parse_bookmark_data(self.get_content()): + url_data = get_url_from(url, self.recursion_level+1, + self.aggregate, parent_url=self.url, name=name) + self.aggregate.urlqueue.put(url_data) + def parse_text (self): """Parse a text file with one url per line; comment and blank lines are ignored.""" diff --git a/linkcheck/fileutil.py b/linkcheck/fileutil.py index 4649baba..c4588cdf 100644 --- a/linkcheck/fileutil.py +++ b/linkcheck/fileutil.py @@ -200,8 +200,12 @@ def guess_mimetype (filename, read=None): """Return MIME type of file, or 'application/octet-stream' if it could not be determined.""" mime, encoding = mimedb.guess_type(filename, strict=False) + basename = os.path.basename(filename) + # Special case for Safari Bookmark files + if not mime and basename == 'Bookmarks.plist': + return 'application/x-plist+safari' # Special case for Google Chrome Bookmark files. - if not mime and os.path.basename(filename) == 'Bookmarks': + if not mime and basename == 'Bookmarks': mime = 'text/plain' # Mime type text/plain can be differentiated further with content reading. if mime == "text/plain" and read is not None: diff --git a/linkcheck/gui/lineedit.py b/linkcheck/gui/lineedit.py index 6f666844..69727f0f 100644 --- a/linkcheck/gui/lineedit.py +++ b/linkcheck/gui/lineedit.py @@ -185,6 +185,9 @@ class LineEdit (QtGui.QLineEdit): if find_opera(): action = menu.addAction(name % {"browser": u"Opera"}) action.triggered.connect(lambda: self.setText(find_opera())) + if find_safari(): + action = menu.addAction(name % {"browser": u"Safari"}) + action.triggered.connect(lambda: self.setText(find_safari())) def contextMenuEvent (self, event): """Handle context menu event.""" @@ -215,3 +218,9 @@ def find_opera (): """Return Opera bookmark filename or empty string if not found.""" from ..bookmarks.opera import find_bookmark_file return find_bookmark_file() + + +def find_safari (): + """Return Safari bookmark filename or empty string if not found.""" + from ..bookmarks.safari import find_bookmark_file + return find_bookmark_file() diff --git a/tests/__init__.py b/tests/__init__.py index c88b5d45..79874c30 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -156,6 +156,18 @@ def has_pyftpdlib (): need_pyftpdlib = _need_func(has_pyftpdlib, "pyftpdlib") +@memoized +def has_biplib (): + """Test if biplib is available.""" + try: + import biplib + return True + except ImportError: + return False + +need_biplib = _need_func(has_biplib, "biplib") + + @memoized def has_newsserver (server): import nntplib @@ -233,5 +245,14 @@ def limit_time (seconds, skip=False): return run_limited +def get_file (filename=None): + """ + Get file name located within 'data' directory. + """ + directory = os.path.join("tests", "checker", "data") + if filename: + return unicode(os.path.join(directory, filename)) + return unicode(directory) + if __name__ == '__main__': print has_clamav(), has_network(), has_msgfmt(), has_posix(), has_proxy() diff --git a/tests/checker/__init__.py b/tests/checker/__init__.py index fde50b00..a5c2445c 100644 --- a/tests/checker/__init__.py +++ b/tests/checker/__init__.py @@ -27,6 +27,7 @@ import linkcheck.configuration import linkcheck.director import linkcheck.logger import linkcheck.i18n +from .. import get_file # helper alias get_url_from = linkcheck.checker.get_url_from @@ -98,16 +99,6 @@ class TestLogger (linkcheck.logger.Logger): self.diff.append(line) -def get_file (filename=None): - """ - Get file name located within 'data' directory. - """ - directory = os.path.join("tests", "checker", "data") - if filename: - return unicode(os.path.join(directory, filename)) - return unicode(directory) - - def get_file_url (filename): return re.sub("^([a-zA-Z]):", r"/\1|", filename.replace("\\", "/")) diff --git a/tests/checker/data/plist_binary/Bookmarks.plist b/tests/checker/data/plist_binary/Bookmarks.plist new file mode 100644 index 0000000000000000000000000000000000000000..eb810b6a93421bbf428e24882f5b3be08ae70aad GIT binary patch literal 721 zcmZuu%T5$Q6s>9jQP62*V8pjFx+|t1RozwYx*i=TK{UvSB$Cbm1=BJ!J?UvP;8(aX zCNAYCOkBBiI|(Z{{))vb858d2BsVALo^$S2ubuV_$2o@9m9g=OtCNXcTGv}V%l13X ze1Gk*r~ltH8g-f2^CO%%ZR@u>@26S!ooJ=)om_Vldv=6zqOxqxBzBd$RLUr%C}JT& zSUC}5p$rjo2w^UfI3mi1N7qWn*P%5vedFeIV$Y38)pvBaNOx16H`dakt=C@|-7G&$ z?D7cfmp1AL0rTV$v99lj)bHhd4JJQpEvWP*?Y%jXwA(p+_}5iSgp%+yD(ie^TUnC>hhZ+ zPrG}=%I!vR;&-FoY3^j%?#KQm73t=#F%aR9IdK>w?)nO0t_Y%n2}DCl1r0dm4oU1v zt0;QUYqh~(u%r)~onBinZD*a@S(N(^9vUUr!{7j`=iqYj~5oJ!^#u$aO1L@6)l}#c&!TTDK5>=JYQ0T1!X9TxKVYO3aIiU;aRI_Q8e;4AnBj=)dw3;YIu;RGzf016nxS8x;V!4I$pKf%xN2OPpb c7PKa;No&R`ThFYQ;B+i_hT!Qro`bdb4+((4od5s; literal 0 HcmV?d00001 diff --git a/tests/checker/data/plist_binary/Bookmarks.plist.result b/tests/checker/data/plist_binary/Bookmarks.plist.result new file mode 100644 index 00000000..a3a04f59 --- /dev/null +++ b/tests/checker/data/plist_binary/Bookmarks.plist.result @@ -0,0 +1,19 @@ +url file://%(curdir)s/%(datadir)s/plist_binary/Bookmarks.plist +cache key file://%(curdir)s/%(datadir)s/plist_binary/Bookmarks.plist +real url file://%(curdir)s/%(datadir)s/plist_binary/Bookmarks.plist +name %(datadir)s/plist_binary/Bookmarks.plist +valid + +url http://www.example.com/ +cache key http://www.example.com/ +real url http://www.iana.org/domains/example/ +name Imadoofus +info Redirected to `http://www.iana.org/domains/example/'. +valid + +url http://www.example.net/ (cached) +cache key http://www.example.net/ +real url http://www.iana.org/domains/example/ +name Imanotherdoofus +info Redirected to `http://www.iana.org/domains/example/'. +valid diff --git a/tests/checker/data/plist_xml/Bookmarks.plist b/tests/checker/data/plist_xml/Bookmarks.plist new file mode 100644 index 00000000..a707f823 --- /dev/null +++ b/tests/checker/data/plist_xml/Bookmarks.plist @@ -0,0 +1,73 @@ + + + + + Children + + + Title + History + WebBookmarkIdentifier + History + WebBookmarkType + WebBookmarkTypeProxy + WebBookmarkUUID + 68DBD24E-CF6B-4D88-9553-ECEC327A619E + + + Children + + + URIDictionary + + title + Imadoofus + + URLString + http://www.example.com/ + WebBookmarkType + WebBookmarkTypeLeaf + WebBookmarkUUID + A4790F77-B13E-4BE5-8C9C-87D8C86B8B05 + + + Children + + + URIDictionary + + title + Imanotherdoofus + + URLString + http://www.example.net/ + WebBookmarkType + WebBookmarkTypeLeaf + WebBookmarkUUID + C42EBD75-23D8-4C89-AAB3-409E68E3A519 + + + Title + News + WebBookmarkType + WebBookmarkTypeList + WebBookmarkUUID + 124497B1-3953-4AF4-9F80-925D33BA02F5 + + + Title + BookmarksBar + WebBookmarkType + WebBookmarkTypeList + WebBookmarkUUID + E4DBB92F-4E11-48C5-BCFD-DF6EDFACD825 + + + WebBookmarkFileVersion + 1 + WebBookmarkType + WebBookmarkTypeList + WebBookmarkUUID + 818DDA78-A975-4E0A-97D7-9055915D4A5E + + diff --git a/tests/checker/data/plist_xml/Bookmarks.plist.result b/tests/checker/data/plist_xml/Bookmarks.plist.result new file mode 100644 index 00000000..be64bde8 --- /dev/null +++ b/tests/checker/data/plist_xml/Bookmarks.plist.result @@ -0,0 +1,21 @@ +# To convert from XML to binary format: +# plutil -convert binary1 -o - Bookmarks.plist +url file://%(curdir)s/%(datadir)s/plist_xml/Bookmarks.plist +cache key file://%(curdir)s/%(datadir)s/plist_xml/Bookmarks.plist +real url file://%(curdir)s/%(datadir)s/plist_xml/Bookmarks.plist +name %(datadir)s/plist_xml/Bookmarks.plist +valid + +url http://www.example.com/ +cache key http://www.example.com/ +real url http://www.iana.org/domains/example/ +name Imadoofus +info Redirected to `http://www.iana.org/domains/example/'. +valid + +url http://www.example.net/ (cached) +cache key http://www.example.net/ +real url http://www.iana.org/domains/example/ +name Imanotherdoofus +info Redirected to `http://www.iana.org/domains/example/'. +valid diff --git a/tests/checker/test_bookmarks.py b/tests/checker/test_bookmarks.py new file mode 100644 index 00000000..0acf9060 --- /dev/null +++ b/tests/checker/test_bookmarks.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2004-2011 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Test file parsing. +""" +from . import LinkCheckTest +from .. import need_network +#, need_biplist +import os + + +class TestBookmarks (LinkCheckTest): + """ + Test bookmark link checking and content parsing. + """ + + @need_network + def _test_firefox_bookmarks (self): + # firefox 3 bookmark file parsing + self.file_test("places.sqlite") + + @need_network + def _test_opera_bookmarks (self): + # Opera bookmark file parsing + self.file_test("opera6.adr") + + @need_network + def _test_chromium_bookmarks (self): + # Chromium and Google Chrome bookmark file parsing + self.file_test("Bookmarks") + + @need_network + def test_safari_bookmarks_xml (self): + # Safari bookmark file parsing (for plaintext plist files) + self.file_test(os.path.join("plist_xml", "Bookmarks.plist")) + + @need_network + def test_safari_bookmarks_binary (self): + # Safari bookmark file parsing (for binary plist files) + self.file_test(os.path.join("plist_binary", "Bookmarks.plist")) diff --git a/tests/checker/test_file.py b/tests/checker/test_file.py index 5e8fe408..bfaf3630 100644 --- a/tests/checker/test_file.py +++ b/tests/checker/test_file.py @@ -61,18 +61,6 @@ class TestFile (LinkCheckTest): def test_urllist (self): self.file_test("urllist.txt") - def test_firefox_bookmarks (self): - # firefox 3 bookmark file parsing - self.file_test("places.sqlite") - - def test_opera_bookmarks (self): - # Opera bookmark file parsing - self.file_test("opera6.adr") - - def test_chromium_bookmarks (self): - # Google Chrome bookmark file parsing - self.file_test("Bookmarks") - def test_directory_listing (self): # unpack non-unicode filename which cannot be stored # in the SF subversion repository diff --git a/tests/test_fileutil.py b/tests/test_fileutil.py index 7e27665d..627c23aa 100644 --- a/tests/test_fileutil.py +++ b/tests/test_fileutil.py @@ -19,11 +19,14 @@ Test file utility functions. """ import unittest +import os +from . import get_file import linkcheck.fileutil file_existing = __file__ file_non_existing = "XXX.i_dont_exist" + class TestFileutil (unittest.TestCase): """Test file utility functions.""" @@ -31,8 +34,17 @@ class TestFileutil (unittest.TestCase): self.assertTrue(linkcheck.fileutil.get_size(file_existing) > 0) self.assertEqual(linkcheck.fileutil.get_size(file_non_existing), -1) - def test_mtime (self): filename = __file__ self.assertTrue(linkcheck.fileutil.get_mtime(file_existing) > 0) self.assertEqual(linkcheck.fileutil.get_mtime(file_non_existing), 0) + + def mime_test (self, filename, mime_expected): + mime = linkcheck.fileutil.guess_mimetype(get_file(filename)) + self.assertEqual(mime, mime_expected) + + def test_mime (self): + filename = os.path.join("plist_binary", "Bookmarks.plist") + self.mime_test(filename, "application/x-plist+safari") + filename = os.path.join("plist_xml", "Bookmarks.plist") + self.mime_test(filename, "application/x-plist+safari")