mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-01 11:34:41 +00:00
Support Google Chrome Bookmark files.
This commit is contained in:
parent
25b6dc2e57
commit
0d4377d1ba
7 changed files with 109 additions and 0 deletions
|
|
@ -14,6 +14,7 @@ Changes:
|
|||
Features:
|
||||
- gui: Preselect filename on save dialog when editing file:// URLs.
|
||||
Closes: SF bug #3176022
|
||||
- checking: Added support for Google Chrome bookmark files.
|
||||
|
||||
|
||||
6.3 "Due Date" (released 6.2.2011)
|
||||
|
|
|
|||
53
linkcheck/bookmarks/chromium.py
Normal file
53
linkcheck/bookmarks/chromium.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2011 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
import json
|
||||
|
||||
|
||||
def parse_bookmarks_data (data):
|
||||
"""Parse data string.
|
||||
Return iterator for bookmarks of the form (url, name).
|
||||
Bookmarks are not sorted.
|
||||
"""
|
||||
for url, name in parse_bookmarks_json(json.loads(data)):
|
||||
yield url, name
|
||||
|
||||
|
||||
def parse_bookmarks_file (file):
|
||||
"""Parse file object.
|
||||
Return iterator for bookmarks of the form (url, name).
|
||||
Bookmarks are not sorted.
|
||||
"""
|
||||
for url, name in parse_bookmarks_json(json.load(file)):
|
||||
yield url, name
|
||||
|
||||
|
||||
def parse_bookmarks_json (data):
|
||||
"""Parse complete JSON data for Chromium Bookmarks."""
|
||||
for entry in data["roots"].values():
|
||||
for entry in parse_bookmarks_node(entry):
|
||||
yield entry
|
||||
|
||||
|
||||
def parse_bookmarks_node (node):
|
||||
"""Parse one JSON node of Chromium Bookmarks."""
|
||||
if node["type"] == "url":
|
||||
yield node["url"], node["name"]
|
||||
elif node["type"] == "folder":
|
||||
for child in node["children"]:
|
||||
for entry in parse_bookmarks_node(child):
|
||||
yield entry
|
||||
|
|
@ -86,6 +86,7 @@ class UrlBase (object):
|
|||
"application/msword": "word",
|
||||
"text/plain+linkchecker": "text",
|
||||
"text/plain+opera": "opera",
|
||||
"text/plain+chromium": "chromium",
|
||||
}
|
||||
|
||||
def __init__ (self, base_url, recursion_level, aggregate,
|
||||
|
|
@ -930,6 +931,15 @@ class UrlBase (object):
|
|||
self.aggregate, parent_url=self.url, line=lineno, name=name)
|
||||
self.aggregate.urlqueue.put(url_data)
|
||||
|
||||
def parse_chromium (self):
|
||||
"""Parse a Google Chrome bookmark file."""
|
||||
log.debug(LOG_CHECK, "Parsing Chromium bookmarks %s", self)
|
||||
from ..bookmarks.chromium import parse_bookmarks_data
|
||||
for url, name in parse_bookmarks_data(self.get_content()):
|
||||
url_data = get_url_from(url, self.recursion_level+1,
|
||||
self.aggregate, parent_url=self.url, name=name)
|
||||
self.aggregate.urlqueue.put(url_data)
|
||||
|
||||
def parse_text (self):
|
||||
"""
|
||||
Parse a text file with on url per line; comment and blank
|
||||
|
|
|
|||
|
|
@ -184,6 +184,7 @@ mimedb = mimetypes.MimeTypes(strict=False)
|
|||
PARSE_CONTENTS = {
|
||||
"text/html": re.compile(r'^(?i)<(!DOCTYPE html|html|head|title)'),
|
||||
"text/plain+opera": re.compile(r'^Opera Hotlist'),
|
||||
"text/plain+chromium": re.compile(r'^{\s*"checksum":'),
|
||||
"text/plain+linkchecker": re.compile(r'(?i)^# LinkChecker URL list'),
|
||||
}
|
||||
|
||||
|
|
|
|||
28
tests/checker/data/Bookmarks
Normal file
28
tests/checker/data/Bookmarks
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
{
|
||||
"checksum": "b1135bffd7fb303459b60851e3f800eb",
|
||||
"roots": {
|
||||
"bookmark_bar": {
|
||||
"children": [ {
|
||||
"date_added": "12942261620096544",
|
||||
"id": "3",
|
||||
"name": "Testlink",
|
||||
"type": "url",
|
||||
"url": "http://example.com/"
|
||||
} ],
|
||||
"date_added": "0",
|
||||
"date_modified": "12942261620096544",
|
||||
"id": "1",
|
||||
"name": "Bookmarks Bar",
|
||||
"type": "folder"
|
||||
},
|
||||
"other": {
|
||||
"children": [ ],
|
||||
"date_added": "0",
|
||||
"date_modified": "0",
|
||||
"id": "2",
|
||||
"name": "Other Bookmarks",
|
||||
"type": "folder"
|
||||
}
|
||||
},
|
||||
"version": 1
|
||||
}
|
||||
12
tests/checker/data/Bookmarks.result
Normal file
12
tests/checker/data/Bookmarks.result
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
url file://%(curdir)s/%(datadir)s/Bookmarks
|
||||
cache key file://%(curdir)s/%(datadir)s/Bookmarks
|
||||
real url file://%(curdir)s/%(datadir)s/Bookmarks
|
||||
name %(datadir)s/Bookmarks
|
||||
valid
|
||||
|
||||
url http://example.com/
|
||||
cache key http://example.com/
|
||||
real url http://www.iana.org/domains/example/
|
||||
name Testlink
|
||||
info Redirected to `http://www.iana.org/domains/example/'.
|
||||
valid
|
||||
|
|
@ -69,6 +69,10 @@ class TestFile (LinkCheckTest):
|
|||
# Opera bookmark file parsing
|
||||
self.file_test("opera6.adr")
|
||||
|
||||
def test_chromium_bookmarks (self):
|
||||
# Google Chrome bookmark file parsing
|
||||
self.file_test("Bookmarks")
|
||||
|
||||
def test_directory_listing (self):
|
||||
# unpack non-unicode filename which cannot be stored
|
||||
# in the SF subversion repository
|
||||
|
|
|
|||
Loading…
Reference in a new issue