aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSkip Montanaro <[email protected]>2021-02-16 20:17:05 -0600
committerSkip Montanaro <[email protected]>2021-02-16 20:17:05 -0600
commitc0f170462209484074470d693af44c65e5a252ac (patch)
tree9ad1cd3b7768069e4518befda5d496b33d3a95f3
parent827843405f67b88e62380846eb96969b389117d5 (diff)
downloadpython-0.9.1-patched-QoL-c0f170462209484074470d693af44c65e5a252ac.tar.xz
python-0.9.1-patched-QoL-c0f170462209484074470d693af44c65e5a252ac.zip
simple script to extract (most of) the original shell archive from the Google Groups HTML
-rw-r--r--shar/sharify.py55
1 files changed, 55 insertions, 0 deletions
diff --git a/shar/sharify.py b/shar/sharify.py
new file mode 100644
index 0000000..42ec1c4
--- /dev/null
+++ b/shar/sharify.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+"""A better reconstitution of the original shell archives from the Google HTML files.
+
+Example (for part 09):
+
+curl https://groups.google.com/g/alt.sources/c/w0LgGPVB6f0/m/SDnD377as9IJ \
+| egrep '<section>' \
+| sed -e 's/.*<section>//' -e 's:</section>.*::' \
+| python shar/sharify.py > shar/python-0.9.1-09-21.shar
+"""
+
+import html
+import re
+import sys
+
+shar = sys.stdin.read()
+
+for (pat, repl) in (
+ (r'X\t', r'X '),
+ (r'X \t', r'X '),
+ (r'X \t', r'X '),
+ (r'X \t', r'X '),
+ ):
+ shar = shar.replace(pat, repl)
+
+for (pat, repl) in (
+ (',[2,[[1,[null,"', ''),
+ (r'\u0026lt;\u003c', '<'),
+ (r'\u003e\u0026gt;', '>'),
+ (r'\u003c', '<'),
+ (r'\u003d', '='),
+ (r'\u003e', '>'),
+ (r'\u0026', '&'),
+ (r'\"', '"'),
+ (r'"]', ''),
+):
+ shar = shar.replace(pat, repl)
+
+shar = html.unescape(shar)
+
+for (pat, repl) in (
+ ('<br>', '\n'),
+ ):
+ shar = shar.replace(pat, repl)
+
+for (pat, repl) in (
+ (r'<a href="[^"]+" target="_blank" rel="nofollow"'
+ r' data-saferedirecturl="[^"]+">([^<]+)</a>',
+ r'\1'),
+ (r'<a href data-email-masked rel="nofollow">([^<]+)</a>', r'\1'),
+ ):
+ shar = re.sub(pat, repl, shar)
+
+sys.stdout.write(shar.rstrip())