From 3bbfac47c767915b8a005f42b7bca32f9da54473 Mon Sep 17 00:00:00 2001 From: calvin Date: Tue, 6 Jul 2004 20:34:00 +0000 Subject: [PATCH] removed git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1353 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- linkcheck/parser/.cvsignore | 2 - linkcheck/parser/Makefile | 27 - linkcheck/parser/__init__.py | 118 ---- linkcheck/parser/htmllex.l | 1032 ---------------------------------- linkcheck/parser/htmllib.py | 115 ---- linkcheck/parser/htmlparse.y | 840 --------------------------- linkcheck/parser/htmlsax.h | 83 --- linkcheck/parser/s_util.c | 54 -- linkcheck/parser/s_util.h | 14 - 9 files changed, 2285 deletions(-) delete mode 100644 linkcheck/parser/.cvsignore delete mode 100644 linkcheck/parser/Makefile delete mode 100644 linkcheck/parser/__init__.py delete mode 100644 linkcheck/parser/htmllex.l delete mode 100644 linkcheck/parser/htmllib.py delete mode 100644 linkcheck/parser/htmlparse.y delete mode 100644 linkcheck/parser/htmlsax.h delete mode 100644 linkcheck/parser/s_util.c delete mode 100644 linkcheck/parser/s_util.h diff --git a/linkcheck/parser/.cvsignore b/linkcheck/parser/.cvsignore deleted file mode 100644 index 625b6ecb..00000000 --- a/linkcheck/parser/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -*.so -*.output diff --git a/linkcheck/parser/Makefile b/linkcheck/parser/Makefile deleted file mode 100644 index 46984ad0..00000000 --- a/linkcheck/parser/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -# parser needs flex >= 2.5.xx from http://lex.sf.net/ -# for reentrant bison parser support! -FLEX=flex -PYVER=2.3 -PYTHON=python$(PYVER) - -all: htmllex.c htmlparse.c - -%.o: %.c - gcc -g -O3 -Wall -pedantic -Wstrict-prototypes -fPIC -I. -I/usr/include/$(PYTHON) -c $< -o $@ - -htmlparse.h htmlparse.c: htmlparse.y htmlsax.h - bison htmlparse.y - -htmllex.l: htmlparse.h - -htmllex.c: htmllex.l htmlsax.h - $(FLEX) htmllex.l - -test: testsax - cat test.html | ./testsax - -clean: - rm -f htmlparse.c htmlparse.h htmllex.c *.o *.output - -splint: - splint -initallelements +posixlib -I/usr/include/linux -I. -I/usr/include/$(PYTHON) htmllex.c | less diff --git a/linkcheck/parser/__init__.py b/linkcheck/parser/__init__.py deleted file mode 100644 index dba70cd7..00000000 --- a/linkcheck/parser/__init__.py +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2004 Bastian Kleineidam -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -"""Fast HTML parser module written in C with the following features: - -1. Reentrant - - As soon as any HTML string data is available, we try to feed it - to the HTML parser. This means that the parser has to scan possible - incomplete data, recognizing as much as it can. Incomplete trailing - data is saved for subsequent calls (or it is just flushed away with the - flush() function). - A reset() brings the parser back to its initial state, throwing away all - buffered data. - -2. Coping with HTML syntax errors - - The parser recognizes as much as it can and passes the rest - of the data as TEXT tokens. - The scanner only passes complete recognized HTML syntax elements to - the parser. Invalid syntax elements are passed as TEXT. This way we do - not need the bison error recovery. - Incomplete data is rescanned the next time the parser calls yylex() or - when it is being flush()ed. - - The following syntax errors will be recognized correctly: - - a) missing quotes around attribute values - b) "" end tags in script modus - c) missing ">" in tags - d) invalid tag names - e) invalid characters inside tags or tag attributes - - Additionally the parser has the following features: - - a) NULL bytes are changed into spaces - b) inside a