I’m nerding out on ya, sorry. Programmers and Script Monkeys only, as I’m not going to explain how to use this.
I’ve created a Python script that grabs all the latest 40K and Fantasy FAQ’s/PDF’s
Here’s the script:
#!/usr/bin/python
import urllib
import sys,os
sGWURL = "http://www.games-workshop.com"
sWarhammerFAQURL = "http://www.games-workshop.com/gws/content/article.jsp?categoryId=1000018&pIndex=1&aId=3000006&start=2"
s40KFAQURL = "http://www.games-workshop.com/gws/content/article.jsp?catId=cat440134a&categoryId=1000018§ion=&pIndex=1&aId=3400019&start=2"
sLocalFantasyFAQPath = "./" # I Normally just use the full filepath to my Dropbox Folder on my Ubuntu box
sLocal40KFAQPath = "./" # I Normally just use the full filepath to my Dropbox Folder on my Ubuntu box
def GetData(sURL):
oPage = urllib.urlopen(sURL)
sPageData = oPage.read()
return sPageData
def FilterOutFAQLines(sData):
FAQs = list()
aLines = sData.splitlines()
for sLine in aLines:
if sLine.lower().startswith("<a href"):
if sLine.lower().find(".pdf") > 0:
FAQs.append(sLine)
return FAQs
def downloadFile(url,localfilename):
webFile = urllib.urlopen(url)
#olocalFile = open(url.split('/')[-1], 'w')
localFile = open(localfilename, 'w')
localFile.write(webFile.read())
webFile.close()
localFile.close()
def GetFile(sHREFLine, sDir):
global sGWURL
sURL = sGWURL + sHREFLine[sHREFLine.find('"') + 1:sHREFLine.find('"', 10)]
sFileName = sHREFLine[sHREFLine.find('>') + 1:sHREFLine.find('<', 10)]
downloadFile(sURL, sDir + sFileName)
return sFileName
def DeleteFolderContents(folder):
for the_file in os.listdir(folder):
file_path = os.path.join(folder, the_file)
try:
if os.path.isfile(file_path):
os.unlink(file_path)
except Exception, e:
print e
aWarhammerFAQs = FilterOutFAQLines(GetData(sWarhammerFAQURL))
a40KFAQs = FilterOutFAQLines(GetData(s40KFAQURL))
DeleteFolderContents(sLocalFantasyFAQPath)
for sLine in aWarhammerFAQs:
sFile = GetFile(sLine, sLocalFantasyFAQPath)
DeleteFolderContents( sLocal40KFAQPath )
for sLine in a40KFAQs:
sFile = GetFile(sLine, sLocal40KFAQPath )
Feel free to download it here in case your copy/paste doesn’t work.
This won’t be very useful to non-programmer types, but it might save someone an hour or so if they wanted to do the same thing.