Every line, except the last one, of the output was missing a double quote character at the end. Additional changes: - Specify parser to avoid BeautifulSoup warning - Use single quotes only Signed-off-by: JoeLametta <JoeLametta@users.noreply.github.com>
67 lines
1.4 KiB
Python
67 lines
1.4 KiB
Python
# -*- Mode: Python -*-
|
|
# vi:si:et:sw=4:sts=4:ts=4
|
|
|
|
# show all possible offsets, in order of popularity, from a download of
|
|
# http://www.accuraterip.com/driveoffsets.htm
|
|
|
|
import sys
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
with open(sys.argv[1]) as f:
|
|
doc = f.read()
|
|
|
|
soup = BeautifulSoup(doc, features='html.parser')
|
|
|
|
offsets = {} # offset -> total count
|
|
|
|
# skip first two spurious elements
|
|
rows = soup.findAll('tr')[2:]
|
|
for row in rows:
|
|
columns = row.findAll('td')
|
|
if len(columns) == 4:
|
|
first, second, third, fourth = columns
|
|
name = first.find(text=True)
|
|
offset = second.find(text=True)
|
|
count = third.find(text=True)
|
|
|
|
# only use numeric offsets
|
|
try:
|
|
int(offset)
|
|
except ValueError:
|
|
continue
|
|
|
|
if offset not in offsets.keys():
|
|
offsets[offset] = 0
|
|
offsets[offset] += int(count)
|
|
|
|
# now sort offsets by count
|
|
counts = []
|
|
for offset, count in offsets.items():
|
|
counts.append((count, offset))
|
|
|
|
counts.sort()
|
|
counts.reverse()
|
|
|
|
offsets = []
|
|
for count, offset in counts:
|
|
offsets.append(offset)
|
|
|
|
# now format it for code inclusion
|
|
lines = []
|
|
line = 'OFFSETS = ("'
|
|
|
|
for offset in offsets:
|
|
line += offset + ', '
|
|
if len(line) > 60:
|
|
line += '"'
|
|
lines.append(line)
|
|
line = ' "'
|
|
|
|
# get last line too, trimming the comma and adding the quote
|
|
if len(line) > 11:
|
|
line = line[:-2] + '")'
|
|
lines.append(line)
|
|
|
|
print('\n'.join(lines))
|