H. Chase Stevens Logo

Most Common Tags

  1. programming
  2. python
  3. code
  4. philosophy
  5. evolution
  6. game design
  7. probability
  8. video games
  9. genetic algorithms
  10. government

Archives

Loading

'Tis the Season for Shoddily Parsing XML

posted on: Monday, December 12, 2011 (6:14 pm) by

Here's a little something I wrote up to download and tag a bunch of mp3s (of the Christmas song variety) from a jwplayer xml playlist. If you'd like to download from other playlists, simply change xml_url (and perhaps localdir, if you'd like to download your music to somewhere a little more accessible). You'll need to have this ID3 library installed.

Download

import urllib
import urllib2
import os
import ID3
xml_url = "http://pinochan.net/overbooru/xmas/playlist.xml"
localdir = "C:\\Xmas_Music"
webdir = xml_url[::-1][xml_url[::-1].find('/'):][::-1]
try:
    os.chdir(localdir)
    print "Downloading to", localdir
except:
    os.mkdir(localdir)
    print "Creating download directory", localdir
try:
    xml_page = urllib2.urlopen(xml_url)
except:
    print "Error opening playlist URL."
    raw_input()
    quit()
print "Parsing XML"
xml = xml_page.read().replace("\t","").replace("\n","")
tracks = xml.split('')[1:]
track_dict = dict()
def get_elem(xml,tag):
    start_tag = "<" + tag + ">"
    end_tag = ""
    return xml[(xml.find(start_tag)+len(start_tag)):xml.find(end_tag)]
down_count = 0
for track in tracks:
    track_id = get_elem(track,"location")
    track_dict[track_id] = dict()
    print "%s:" %track_id
    track_dict[track_id]['Artist'] = get_elem(track,"creator").replace("'","'").replace("&","&")
    print "\t Artist: %s" %track_dict[track_id]['Artist']
    track_dict[track_id]['Title'] = get_elem(track,"title").replace("'","'").replace("&","&")
    print "\t Title: %s" %track_dict[track_id]['Title']
    track_dict[track_id]['URL'] = webdir + track_id
    if not track_id in os.listdir(os.getcwd()):
        print "\t File not found in directory, downloading now."
        try:
            urllib.urlretrieve(track_dict[track_id]['URL'],filename=track_id)
            print "\t Download complete, editing metadata."
            down_count += 1
        except:
            print "\t Could not download %s, moving on to next file." % track_dict[track_id]['URL']
            break
        try:
            id3info = ID3.ID3(track_id)
            id3info['TITLE'] = track_dict[track_id]['Title']
            id3info['ARTIST'] = track_dict[track_id]['Artist']
            print "\t Done editing metadata: %s" %id3info
        except:
            print "\t There was an error editing metadata."
    else:
        print "\t File already in directory."
print "Downloaded %s files." %down_count
print "%s files in %s." %(len(os.listdir(os.getcwd())),os.getcwd())
print "Press Enter to exit."
raw_input()

Tags: code, jwplayer, metadata, mp3, music, programming, python, tagging, xmas, xml