2014-01-08 21:33:04 +00:00
# Copyright (C) 2001-2014 Bastian Kleineidam
2006-05-17 19:08:40 +00:00
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
2009-07-24 21:58:20 +00:00
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
2006-05-17 19:08:40 +00:00
"""
Handle uncheckable URLs .
"""
import re
2008-05-09 06:16:03 +00:00
from . import urlbase
2006-05-17 19:08:40 +00:00
2014-03-12 18:20:49 +00:00
2020-05-16 19:19:42 +00:00
class UnknownUrl ( urlbase . UrlBase ) :
2014-03-12 18:20:49 +00:00
""" Handle unknown or just plain broken URLs. """
2020-05-16 19:19:42 +00:00
def build_url ( self ) :
2014-03-12 18:20:49 +00:00
""" Only logs that this URL is unknown. """
2020-06-03 19:06:36 +00:00
super ( ) . build_url ( )
2014-03-12 18:20:49 +00:00
if self . is_ignored ( ) :
2020-05-30 16:01:36 +00:00
self . add_info (
_ ( " %(scheme)s URL ignored. " ) % { " scheme " : self . scheme . capitalize ( ) }
)
2014-03-12 18:20:49 +00:00
self . set_result ( _ ( " ignored " ) )
else :
2020-05-30 16:01:36 +00:00
self . set_result ( _ ( " URL is unrecognized or has invalid syntax " ) , valid = False )
2014-03-12 18:20:49 +00:00
2020-05-16 19:19:42 +00:00
def is_ignored ( self ) :
2014-03-12 18:20:49 +00:00
""" Return True if this URL scheme is ignored. """
return is_unknown_scheme ( self . scheme )
2020-05-16 19:19:42 +00:00
def can_get_content ( self ) :
2014-03-12 18:20:49 +00:00
""" Unknown URLs have no content.
@return : False
@rtype : bool
"""
return False
2014-07-13 19:51:53 +00:00
# do not edit anything below since these entries are generated from
# scripts/update_iana_uri_schemes.sh
2014-03-12 18:20:49 +00:00
# DO NOT REMOVE
# from https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
2012-09-22 14:18:37 +00:00
ignored_schemes_permanent = r """
2014-03-12 18:20:49 +00:00
| aaa # Diameter Protocol
| aaas # Diameter Protocol with Secure Transport
2012-09-22 14:18:37 +00:00
| about # about
| acap # application configuration access protocol
2014-03-12 18:20:49 +00:00
| acct # acct
2012-09-22 14:18:37 +00:00
| cap # Calendar Access Protocol
2006-05-17 19:08:40 +00:00
| cid # content identifier
2014-03-12 18:20:49 +00:00
| coap # coap
2020-08-09 16:10:26 +00:00
| coap \+ tcp # coap+tcp [1]
| coap \+ ws # coap+ws [1]
2014-03-12 18:20:49 +00:00
| coaps # coaps
2020-08-09 16:10:26 +00:00
| coaps \+ tcp # coaps+tcp [1]
| coaps \+ ws # coaps+ws [1]
2012-09-22 14:18:37 +00:00
| crid # TV-Anytime Content Reference Identifier
2006-05-17 19:08:40 +00:00
| data # data
| dav # dav
2012-09-22 14:18:37 +00:00
| dict # dictionary service protocol
2014-03-12 18:20:49 +00:00
| dns # Domain Name System
2022-02-06 22:40:36 +00:00
| dtn # DTNRG research and development
2020-08-09 16:10:26 +00:00
| example # example
2012-09-22 14:18:37 +00:00
| geo # Geographic Locations
| go # go
2014-03-12 18:20:49 +00:00
| gopher # The Gopher Protocol
2012-09-22 14:18:37 +00:00
| h323 # H.323
| iax # Inter-Asterisk eXchange Version 2
| icap # Internet Content Adaptation Protocol
| im # Instant Messaging
2006-05-17 19:08:40 +00:00
| imap # internet message access protocol
2020-08-09 16:10:26 +00:00
| info # Information Assets with Identifiers in Public Namespaces. [RFC4452] (section 3) defines an "info" registry of public namespaces, which is maintained by NISO and can be accessed from [http://info-uri.info/].
2022-02-06 22:40:36 +00:00
| ipn # ipn
2012-09-22 14:18:37 +00:00
| ipp # Internet Printing Protocol
2020-08-09 16:10:26 +00:00
| ipps # Internet Printing Protocol over HTTPS
2012-09-22 14:18:37 +00:00
| iris # Internet Registry Information Service
2014-03-12 18:20:49 +00:00
| iris \. beep # iris.beep
| iris \. lwz # iris.lwz
| iris \. xpc # iris.xpc
| iris \. xpcs # iris.xpcs
| jabber # jabber
2006-05-17 19:08:40 +00:00
| ldap # Lightweight Directory Access Protocol
2020-08-09 16:10:26 +00:00
| leaptofrogans # leaptofrogans
2006-05-17 19:08:40 +00:00
| mid # message identifier
2014-03-12 18:20:49 +00:00
| msrp # Message Session Relay Protocol
| msrps # Message Session Relay Protocol Secure
2012-09-22 14:18:37 +00:00
| mtqp # Message Tracking Query Protocol
| mupdate # Mailbox Update (MUPDATE) Protocol
2006-05-17 19:08:40 +00:00
| nfs # network file system protocol
2014-03-12 18:20:49 +00:00
| ni # ni
| nih # nih
| opaquelocktoken # opaquelocktokent
2020-08-09 16:10:26 +00:00
| pkcs11 # PKCS#11
2006-05-17 19:08:40 +00:00
| pop # Post Office Protocol v3
2012-09-22 14:18:37 +00:00
| pres # Presence
2014-03-12 18:20:49 +00:00
| reload # reload
2020-08-09 16:10:26 +00:00
| rtsp # Real-Time Streaming Protocol (RTSP)
| rtsps # Real-Time Streaming Protocol (RTSP) over TLS
| rtspu # Real-Time Streaming Protocol (RTSP) over unreliable datagram transport
2006-05-17 19:08:40 +00:00
| service # service location
2014-03-12 18:20:49 +00:00
| session # session
| shttp # Secure Hypertext Transfer Protocol
2012-09-22 14:18:37 +00:00
| sieve # ManageSieve Protocol
2014-03-12 18:20:49 +00:00
| sip # session initiation protocol
| sips # secure session initiation protocol
2012-09-22 14:18:37 +00:00
| sms # Short Message Service
| snmp # Simple Network Management Protocol
2014-03-12 18:20:49 +00:00
| soap \. beep # soap.beep
| soap \. beeps # soap.beeps
| stun # stun
| stuns # stuns
| tag # tag
2006-05-17 19:08:40 +00:00
| tel # telephone
2014-03-12 18:20:49 +00:00
| telnet # Reference to interactive sessions
2012-09-22 14:18:37 +00:00
| tftp # Trivial File Transfer Protocol
2014-03-12 18:20:49 +00:00
| thismessage # multipart/related relative reference resolution
2006-05-17 19:08:40 +00:00
| tip # Transaction Internet Protocol
| tn3270 # Interactive 3270 emulation sessions
2014-03-12 18:20:49 +00:00
| turn # turn
| turns # turns
2012-09-22 14:18:37 +00:00
| tv # TV Broadcasts
| urn # Uniform Resource Names
2006-05-17 19:08:40 +00:00
| vemmi # versatile multimedia interface
2020-08-09 16:10:26 +00:00
| vnc # Remote Framebuffer Protocol
2014-03-12 18:20:49 +00:00
| ws # WebSocket connections
| wss # Encrypted WebSocket connections
| xcon # xcon
| xcon \- userid # xcon-userid
| xmlrpc \. beep # xmlrpc.beep
| xmlrpc \. beeps # xmlrpc.beeps
| xmpp # Extensible Messaging and Presence Protocol
2006-05-17 19:08:40 +00:00
| z39 \.50 r # Z39.50 Retrieval
| z39 \.50 s # Z39.50 Session
2012-09-22 14:18:37 +00:00
"""
ignored_schemes_provisional = r """
2020-08-09 16:10:26 +00:00
| acd # acd
| acr # acr
2014-03-12 18:20:49 +00:00
| adiumxtra # adiumxtra
2020-08-09 16:10:26 +00:00
| adt # adt
2014-03-12 18:20:49 +00:00
| afp # afp
2012-09-22 14:18:37 +00:00
| afs # Andrew File System global file names
2014-03-12 18:20:49 +00:00
| aim # aim
2020-08-09 16:10:26 +00:00
| amss # amss
| android # android
| appdata # appdata
2014-03-12 18:20:49 +00:00
| apt # apt
2022-02-06 22:40:36 +00:00
| ar # ar
2020-08-09 16:10:26 +00:00
| ark # ark
2014-03-12 18:20:49 +00:00
| attachment # attachment
| aw # aw
2014-07-13 19:51:53 +00:00
| barion # barion
2014-03-12 18:20:49 +00:00
| beshare # beshare
| bitcoin # bitcoin
2020-08-09 16:10:26 +00:00
| bitcoincash # bitcoincash
| blob # blob
2014-03-12 18:20:49 +00:00
| bolo # bolo
2020-08-09 16:10:26 +00:00
| browserext # browserext
| cabal # cabal
| calculator # calculator
2014-03-12 18:20:49 +00:00
| callto # callto
2020-08-09 16:10:26 +00:00
| cast # cast
| casts # casts
2014-03-12 18:20:49 +00:00
| chrome # chrome
| chrome \- extension # chrome-extension
| com \- eventbrite \- attendee # com-eventbrite-attendee
| content # content
2022-02-06 22:40:36 +00:00
| content \- type # content-type
2022-09-16 18:21:32 +00:00
| cstr # cstr
2014-03-12 18:20:49 +00:00
| cvs # cvs
2020-08-09 16:10:26 +00:00
| dab # dab
| dat # dat
| diaspora # diaspora
| did # did
| dis # dis
2014-03-12 18:20:49 +00:00
| dlna \- playcontainer # dlna-playcontainer
| dlna \- playsingle # dlna-playsingle
2020-08-09 16:10:26 +00:00
| dntp # dntp
| doi # doi
| dpp # dpp
| drm # drm
| drop # drop
| dtmi # dtmi
2014-03-12 18:20:49 +00:00
| dvb # dvb
2022-02-06 22:40:36 +00:00
| dvx # dvx
2020-08-09 16:10:26 +00:00
| dweb # dweb
2014-03-12 18:20:49 +00:00
| ed2k # ed2k
2020-08-09 16:10:26 +00:00
| elsi # elsi
2022-02-06 22:40:36 +00:00
| embedded # embedded
| ens # ens
2020-08-09 16:10:26 +00:00
| ethereum # ethereum
2014-03-12 18:20:49 +00:00
| facetime # facetime
| feed # feed
| feedready # feedready
2022-02-06 22:40:36 +00:00
| fido # fido
2014-03-12 18:20:49 +00:00
| finger # finger
2020-08-09 16:10:26 +00:00
| first \- run \- pen \- experience # first-run-pen-experience
2014-03-12 18:20:49 +00:00
| fish # fish
2020-08-09 16:10:26 +00:00
| fm # fm
| fuchsia \- pkg # fuchsia-pkg
2014-03-12 18:20:49 +00:00
| gg # gg
| git # git
2022-09-16 18:21:32 +00:00
| gitoid # gitoid
2014-03-12 18:20:49 +00:00
| gizmoproject # gizmoproject
2020-08-09 16:10:26 +00:00
| graph # graph
2014-03-12 18:20:49 +00:00
| gtalk # gtalk
| ham # ham
2020-08-09 16:10:26 +00:00
| hcap # hcap
2014-03-12 18:20:49 +00:00
| hcp # hcp
2020-08-09 16:10:26 +00:00
| hxxp # hxxp
| hxxps # hxxps
| hydrazone # hydrazone
| hyper # hyper
2014-03-12 18:20:49 +00:00
| icon # icon
2020-08-09 16:10:26 +00:00
| iotdisco # iotdisco
| ipfs # ipfs
| ipns # ipns
2014-03-12 18:20:49 +00:00
| irc # irc
| irc6 # irc6
| ircs # ircs
2020-08-09 16:10:26 +00:00
| isostore # isostore
2014-03-12 18:20:49 +00:00
| itms # itms
| jar # jar
2012-09-22 14:18:37 +00:00
| jms # Java Message Service
2014-03-12 18:20:49 +00:00
| keyparc # keyparc
| lastfm # lastfm
2020-08-09 16:10:26 +00:00
| lbry # lbry
2014-03-12 18:20:49 +00:00
| ldaps # ldaps
2020-08-09 16:10:26 +00:00
| lorawan # lorawan
| lvlt # lvlt
2014-03-12 18:20:49 +00:00
| magnet # magnet
| maps # maps
| market # market
2020-08-09 16:10:26 +00:00
| matrix # matrix
2014-03-12 18:20:49 +00:00
| message # message
2020-08-09 16:10:26 +00:00
| microsoft \. windows \. camera # microsoft.windows.camera
| microsoft \. windows \. camera \. multipicker # microsoft.windows.camera.multipicker
| microsoft \. windows \. camera \. picker # microsoft.windows.camera.picker
2014-03-12 18:20:49 +00:00
| mms # mms
2020-08-09 16:10:26 +00:00
| mongodb # mongodb
| moz # moz
| ms \- access # ms-access
2022-09-16 18:21:32 +00:00
| ms \- appinstaller # ms-appinstaller
2020-08-09 16:10:26 +00:00
| ms \- browser \- extension # ms-browser-extension
| ms \- calculator # ms-calculator
| ms \- drive \- to # ms-drive-to
| ms \- enrollment # ms-enrollment
| ms \- excel # ms-excel
| ms \- eyecontrolspeech # ms-eyecontrolspeech
| ms \- gamebarservices # ms-gamebarservices
| ms \- gamingoverlay # ms-gamingoverlay
| ms \- getoffice # ms-getoffice
2014-03-12 18:20:49 +00:00
| ms \- help # ms-help
2020-08-09 16:10:26 +00:00
| ms \- infopath # ms-infopath
| ms \- inputapp # ms-inputapp
| ms \- lockscreencomponent \- config # ms-lockscreencomponent-config
| ms \- media \- stream \- id # ms-media-stream-id
2022-02-06 22:40:36 +00:00
| ms \- meetnow # ms-meetnow
2020-08-09 16:10:26 +00:00
| ms \- mixedrealitycapture # ms-mixedrealitycapture
| ms \- mobileplans # ms-mobileplans
2022-09-16 18:21:32 +00:00
| ms \- newsandinterests # ms-newsandinterests
2020-08-09 16:10:26 +00:00
| ms \- officeapp # ms-officeapp
| ms \- people # ms-people
| ms \- powerpoint # ms-powerpoint
| ms \- project # ms-project
| ms \- publisher # ms-publisher
| ms \- restoretabcompanion # ms-restoretabcompanion
| ms \- screenclip # ms-screenclip
| ms \- screensketch # ms-screensketch
| ms \- search # ms-search
| ms \- search \- repair # ms-search-repair
| ms \- secondary \- screen \- controller # ms-secondary-screen-controller
| ms \- secondary \- screen \- setup # ms-secondary-screen-setup
| ms \- settings # ms-settings
| ms \- settings \- airplanemode # ms-settings-airplanemode
| ms \- settings \- bluetooth # ms-settings-bluetooth
| ms \- settings \- camera # ms-settings-camera
| ms \- settings \- cellular # ms-settings-cellular
| ms \- settings \- cloudstorage # ms-settings-cloudstorage
| ms \- settings \- connectabledevices # ms-settings-connectabledevices
| ms \- settings \- displays \- topology # ms-settings-displays-topology
| ms \- settings \- emailandaccounts # ms-settings-emailandaccounts
| ms \- settings \- language # ms-settings-language
| ms \- settings \- location # ms-settings-location
| ms \- settings \- lock # ms-settings-lock
| ms \- settings \- nfctransactions # ms-settings-nfctransactions
| ms \- settings \- notifications # ms-settings-notifications
2014-03-12 18:20:49 +00:00
| ms \- settings \- power # ms-settings-power
2020-08-09 16:10:26 +00:00
| ms \- settings \- privacy # ms-settings-privacy
| ms \- settings \- proximity # ms-settings-proximity
| ms \- settings \- screenrotation # ms-settings-screenrotation
| ms \- settings \- wifi # ms-settings-wifi
| ms \- settings \- workplace # ms-settings-workplace
| ms \- spd # ms-spd
2022-02-06 22:40:36 +00:00
| ms \- stickers # ms-stickers
2020-08-09 16:10:26 +00:00
| ms \- sttoverlay # ms-sttoverlay
| ms \- transit \- to # ms-transit-to
| ms \- useractivityset # ms-useractivityset
| ms \- virtualtouchpad # ms-virtualtouchpad
| ms \- visio # ms-visio
| ms \- walk \- to # ms-walk-to
| ms \- whiteboard # ms-whiteboard
| ms \- whiteboard \- cmd # ms-whiteboard-cmd
| ms \- word # ms-word
2014-03-12 18:20:49 +00:00
| msnim # msnim
2020-08-09 16:10:26 +00:00
| mss # mss
2022-02-06 22:40:36 +00:00
| mt # Matter protocol
2014-03-12 18:20:49 +00:00
| mumble # mumble
| mvn # mvn
| notes # notes
2022-02-06 22:40:36 +00:00
| num # Namespace Utility Modules
2020-08-09 16:10:26 +00:00
| ocf # ocf
2014-03-12 18:20:49 +00:00
| oid # oid
2020-08-09 16:10:26 +00:00
| onenote # onenote
| onenote \- cmd # onenote-cmd
| openpgp4fpr # openpgp4fpr
| otpauth # otpauth
2014-03-12 18:20:49 +00:00
| palm # palm
| paparazzi # paparazzi
2020-08-09 16:10:26 +00:00
| payment # payment
| payto # payto
2014-03-12 18:20:49 +00:00
| platform # platform
| proxy # proxy
| psyc # psyc
2020-08-09 16:10:26 +00:00
| pttp # pttp
| pwid # pwid
| qb # qb
2014-03-12 18:20:49 +00:00
| query # query
2020-08-09 16:10:26 +00:00
| quic \- transport # quic-transport
| redis # redis
| rediss # rediss
2014-03-12 18:20:49 +00:00
| res # res
| resource # resource
| rmi # rmi
| rsync # rsync
2020-08-09 16:10:26 +00:00
| rtmfp # rtmfp
2014-03-12 18:20:49 +00:00
| rtmp # rtmp
2022-02-06 22:40:36 +00:00
| sarif # sarif
2014-03-12 18:20:49 +00:00
| secondlife # query
2022-02-06 22:40:36 +00:00
| secret \- token # secret-token
2014-03-12 18:20:49 +00:00
| sftp # query
| sgn # sgn
2022-02-06 22:40:36 +00:00
| shc # shc
2020-08-09 16:10:26 +00:00
| simpleledger # simpleledger
2022-02-06 22:40:36 +00:00
| simplex # simplex
2014-03-12 18:20:49 +00:00
| skype # skype
| smb # smb
2022-02-06 22:40:36 +00:00
| smp # smp
2014-07-13 19:51:53 +00:00
| smtp # smtp
2014-03-12 18:20:49 +00:00
| soldat # soldat
2020-08-09 16:10:26 +00:00
| spiffe # spiffe
2014-03-12 18:20:49 +00:00
| spotify # spotify
2020-08-09 16:10:26 +00:00
| ssb # ssb
2014-03-12 18:20:49 +00:00
| ssh # ssh
| steam # steam
2014-07-13 19:51:53 +00:00
| submit # submit
2014-03-12 18:20:49 +00:00
| svn # svn
2020-08-09 16:10:26 +00:00
| swh # swh
2022-02-06 22:40:36 +00:00
| swid # swid
| swidpath # swidpath
2014-03-12 18:20:49 +00:00
| teamspeak # teamspeak
2020-08-09 16:10:26 +00:00
| teliaeid # teliaeid
2014-03-12 18:20:49 +00:00
| things # things
2020-08-09 16:10:26 +00:00
| tool # tool
2014-03-12 18:20:49 +00:00
| udp # udp
| unreal # unreal
| ut2004 # ut2004
2022-02-06 22:40:36 +00:00
| uuid \- in \- package # uuid-in-package
2020-08-09 16:10:26 +00:00
| v \- event # v-event
2014-03-12 18:20:49 +00:00
| ventrilo # ventrilo
2022-02-06 22:40:36 +00:00
| ves # ves
2014-03-12 18:20:49 +00:00
| view \- source # view-source
2020-08-09 16:10:26 +00:00
| vscode # vscode
| vscode \- insiders # vscode-insiders
| vsls # vsls
2022-09-16 18:21:32 +00:00
| w3 # w3
2022-02-06 22:40:36 +00:00
| wcr # wcr
2022-09-16 18:21:32 +00:00
| web3 # web3
2014-03-12 18:20:49 +00:00
| webcal # webcal
2022-02-06 22:40:36 +00:00
| wifi # wifi
2014-03-12 18:20:49 +00:00
| wtai # wtai
| wyciwyg # wyciwyg
| xfire # xfire
| xri # xri
| ymsgr # ymsgr
2012-09-22 14:18:37 +00:00
"""
ignored_schemes_historical = r """
| fax # fax
2020-08-09 16:10:26 +00:00
| filesystem # filesystem
2012-09-22 14:18:37 +00:00
| mailserver # Access to data available from mail servers
| modem # modem
2014-03-12 18:20:49 +00:00
| pack # pack
2012-09-22 14:18:37 +00:00
| prospero # Prospero Directory Service
2014-03-12 18:20:49 +00:00
| snews # NNTP over SSL/TLS
| videotex # videotex
2012-09-22 14:18:37 +00:00
| wais # Wide Area Information Servers
2020-08-09 16:10:26 +00:00
| wpid # wpid
2012-09-22 14:18:37 +00:00
| z39 \.50 # Z39.50 information access
"""
ignored_schemes_other = r """
| clsid # Microsoft specific
| find # Mozilla specific
2022-09-16 18:21:32 +00:00
| gemini # Gemini protocol
2012-09-22 14:18:37 +00:00
| isbn # ISBN (int. book numbers)
| javascript # JavaScript
2020-08-09 16:10:26 +00:00
| slack # Slack Technologies client
2022-02-06 22:41:33 +00:00
| tg # Telegram
| whatsapp # WhatsApp
2012-09-22 14:18:37 +00:00
"""
2014-02-28 23:12:34 +00:00
ignored_schemes = " ^( %s %s %s %s )$ " % (
2012-09-22 14:18:37 +00:00
ignored_schemes_permanent ,
ignored_schemes_provisional ,
ignored_schemes_historical ,
ignored_schemes_other ,
)
2006-05-17 19:08:40 +00:00
ignored_schemes_re = re . compile ( ignored_schemes , re . VERBOSE )
2014-02-28 23:12:34 +00:00
is_unknown_scheme = ignored_schemes_re . match