Support Google Chrome Bookmark files.

This commit is contained in:
Bastian Kleineidam 2011-02-15 18:26:00 +01:00
parent 25b6dc2e57
commit 0d4377d1ba
7 changed files with 109 additions and 0 deletions

View file

@ -14,6 +14,7 @@ Changes:
Features:
- gui: Preselect filename on save dialog when editing file:// URLs.
Closes: SF bug #3176022
- checking: Added support for Google Chrome bookmark files.
6.3 "Due Date" (released 6.2.2011)

View file

@ -0,0 +1,53 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2011 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import json
def parse_bookmarks_data (data):
"""Parse data string.
Return iterator for bookmarks of the form (url, name).
Bookmarks are not sorted.
"""
for url, name in parse_bookmarks_json(json.loads(data)):
yield url, name
def parse_bookmarks_file (file):
"""Parse file object.
Return iterator for bookmarks of the form (url, name).
Bookmarks are not sorted.
"""
for url, name in parse_bookmarks_json(json.load(file)):
yield url, name
def parse_bookmarks_json (data):
"""Parse complete JSON data for Chromium Bookmarks."""
for entry in data["roots"].values():
for entry in parse_bookmarks_node(entry):
yield entry
def parse_bookmarks_node (node):
"""Parse one JSON node of Chromium Bookmarks."""
if node["type"] == "url":
yield node["url"], node["name"]
elif node["type"] == "folder":
for child in node["children"]:
for entry in parse_bookmarks_node(child):
yield entry

View file

@ -86,6 +86,7 @@ class UrlBase (object):
"application/msword": "word",
"text/plain+linkchecker": "text",
"text/plain+opera": "opera",
"text/plain+chromium": "chromium",
}
def __init__ (self, base_url, recursion_level, aggregate,
@ -930,6 +931,15 @@ class UrlBase (object):
self.aggregate, parent_url=self.url, line=lineno, name=name)
self.aggregate.urlqueue.put(url_data)
def parse_chromium (self):
"""Parse a Google Chrome bookmark file."""
log.debug(LOG_CHECK, "Parsing Chromium bookmarks %s", self)
from ..bookmarks.chromium import parse_bookmarks_data
for url, name in parse_bookmarks_data(self.get_content()):
url_data = get_url_from(url, self.recursion_level+1,
self.aggregate, parent_url=self.url, name=name)
self.aggregate.urlqueue.put(url_data)
def parse_text (self):
"""
Parse a text file with on url per line; comment and blank

View file

@ -184,6 +184,7 @@ mimedb = mimetypes.MimeTypes(strict=False)
PARSE_CONTENTS = {
"text/html": re.compile(r'^(?i)<(!DOCTYPE html|html|head|title)'),
"text/plain+opera": re.compile(r'^Opera Hotlist'),
"text/plain+chromium": re.compile(r'^{\s*"checksum":'),
"text/plain+linkchecker": re.compile(r'(?i)^# LinkChecker URL list'),
}

View file

@ -0,0 +1,28 @@
{
"checksum": "b1135bffd7fb303459b60851e3f800eb",
"roots": {
"bookmark_bar": {
"children": [ {
"date_added": "12942261620096544",
"id": "3",
"name": "Testlink",
"type": "url",
"url": "http://example.com/"
} ],
"date_added": "0",
"date_modified": "12942261620096544",
"id": "1",
"name": "Bookmarks Bar",
"type": "folder"
},
"other": {
"children": [ ],
"date_added": "0",
"date_modified": "0",
"id": "2",
"name": "Other Bookmarks",
"type": "folder"
}
},
"version": 1
}

View file

@ -0,0 +1,12 @@
url file://%(curdir)s/%(datadir)s/Bookmarks
cache key file://%(curdir)s/%(datadir)s/Bookmarks
real url file://%(curdir)s/%(datadir)s/Bookmarks
name %(datadir)s/Bookmarks
valid
url http://example.com/
cache key http://example.com/
real url http://www.iana.org/domains/example/
name Testlink
info Redirected to `http://www.iana.org/domains/example/'.
valid

View file

@ -69,6 +69,10 @@ class TestFile (LinkCheckTest):
# Opera bookmark file parsing
self.file_test("opera6.adr")
def test_chromium_bookmarks (self):
# Google Chrome bookmark file parsing
self.file_test("Bookmarks")
def test_directory_listing (self):
# unpack non-unicode filename which cannot be stored
# in the SF subversion repository