Merge branch 'develop'. lots of changes, see git log for details. Cool features: post process single dirs, select specific downloads, track specific branches.....

This commit is contained in:
rembo10
2014-04-09 17:16:21 -07:00
643 changed files with 86033 additions and 107676 deletions

View File

@@ -65,3 +65,7 @@ getAlbumInfo&id=$albumid (See above, returns Summary and Content)
getArtistThumb&id=$artistid (Returns either a relative path to the cached thumbnail artist image, or an http:// address if the cache dir can't be written to)
getAlbumThumb&id=$albumid (see above)
choose_specific_download&id=$albumid (Gives you a list of results from searcher.searchforalbum(). Basically runs a normal search, but rather than sorting them and downloading the best result, it dumps the data, which you can then pass on to download_specific_release(). Returns a list of dictionaries with params: title, size, url, provider & kind - all of these values must be passed back to download_specific_release)
download_specific_release&id=albumid&title=$title&size=$size&url=$url&provider=$provider&kind=$kind (Allows you to manually pass a choose_specific_download release back to searcher.send_to_downloader())

View File

@@ -14,7 +14,12 @@
# You should have received a copy of the GNU General Public License
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
import os, sys, locale
import os, sys
# Ensure lib added to path, before any other imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'lib/'))
import locale
import time
import signal
@@ -34,7 +39,6 @@ signal.signal(signal.SIGTERM, headphones.sig_handler)
def main():
# Fixed paths to Headphones
if hasattr(sys, 'frozen'):
headphones.FULL_PATH = os.path.abspath(sys.executable)
@@ -142,7 +146,7 @@ def main():
# Force the http port if neccessary
if args.port:
http_port = args.port
logger.info('Using forced port: %i' % http_port)
logger.info('Using forced port: %i', http_port)
else:
http_port = int(headphones.HTTP_PORT)
@@ -172,7 +176,7 @@ def main():
except KeyboardInterrupt:
headphones.SIGNAL = 'shutdown'
else:
logger.info('Received signal: ' + headphones.SIGNAL)
logger.info('Received signal: %s', headphones.SIGNAL)
if headphones.SIGNAL == 'shutdown':
headphones.shutdown()
elif headphones.SIGNAL == 'restart':

8
TODO
View File

@@ -1,8 +0,0 @@
##############################
### TODO LIST (18/08/12) ###
##############################
1. Add ability to fetch a specific extra, rather than pull down the whole category
- This requires extra calls to MB right now, so modify 1st mb artist call to pull down the
whole album list, then only send the desried ones to importer.py
2. Update API with newstyle getExtras function (lacking entirely right now)

View File

@@ -5,8 +5,13 @@
%>
<%def name="headerIncludes()">
<div id="subhead_container">
<div id="back_to_previous_link">
<a href="artistPage?ArtistID=${album['ArtistID']}" class="back">&laquo; Back to ${album['ArtistName']}</a>
</div>
<div id="subhead_menu">
<a id="menu_link_delete" href="deleteAlbum?AlbumID=${album['AlbumID']}&ArtistID=${album['ArtistID']}"><i class="fa fa-trash-o"></i> Delete Album</a>
%if album['Status'] == 'Skipped':
<a id="menu_link_wanted" href="#" onclick="doAjaxCall('queueAlbum?AlbumID=${album['AlbumID']}&ArtistID=${album['ArtistID']}&new=False', $(this),true)" data-success="'${album['AlbumTitle']}' added to queue"><i class="fa fa-heart"></i> Mark Album as Wanted</a>
@@ -59,9 +64,23 @@
<input type="button" value="Save changes" onclick="doAjaxCall('editSearchTerm',$(this),'tabs',true);return false;" data-success="Search term updated"/>
</form>
</div>
<a class="menu_link_edit" id="choose_specific_download" href="#" onclick="getAvailableDownloads()"><i class="fa fa-search"></i> Choose Specific Download</a>
<div id="choose_specific_download_dialog" title="Choose a specific download for this album" style="display:none" class="configtable">
<table class="display" id="downloads_table">
<thead>
<tr>
<th id="title">Title</th>
<th id="size">Size</th>
<th id="provider">Provider</th>
<th id="kind">Kind</th>
</tr>
</thead>
<tbody id="downloads_table_body">
</tbody>
</table>
</div>
</div>
</div>
<a href="artistPage?ArtistID=${album['ArtistID']}" class="back">&laquo; Back to ${album['ArtistName']}</a>
</%def>
<%def name="body()">
@@ -89,7 +108,6 @@
<li>Duration: <span>${albumduration}</span></li>
</ul>
</div>
</div>
<div id="track_wrapper">
<table class="display" id="track_table">
@@ -197,9 +215,66 @@
"bInfo": false,
"bPaginate": false,
"bDestroy": true
});
});
};
function getAvailableDownloads() {
ShowSpinner();
$.getJSON("choose_specific_download?AlbumID=${album['AlbumID']}", function(data) {
loader.remove();
feedback.fadeOut();
search_results = data
for( var i = 0, len = data.length; i < len; i++ ) {
$('#downloads_table_body').append('<tr><td id="title"><a href="#" onclick="downloadSpecificRelease('+i+')">'+data[i].title+'</a></td><td id="size"><span title='+data[i].size+'></span>'+(data[i].size / (1024*1024)).toFixed(2)+' MB</td><td id="provider">'+data[i].provider+'</td><td id="kind">'+data[i].kind+'</td></tr>');
}
$('#downloads_table').dataTable({
"aoColumns": [
null,
{ "sType": "title-numeric"},
null,
null
],
"aaSorting": [],
"bFilter": false,
"bInfo": false,
"bPaginate": false,
});
$("#choose_specific_download_dialog").dialog({ width: "1000px" });
return false;
});
}
function downloadSpecificRelease(i){
title = search_results[i].title
size = search_results[i].size
url = search_results[i].url
provider = search_results[i].provider
kind = search_results[i].kind
ShowSpinner();
$.getJSON("download_specific_release?AlbumID=${album['AlbumID']}&title="+title+"&size="+size+"&url="+url+"&provider="+provider+"&kind=" + kind, function(data) {
loader.remove();
feedback.fadeOut();
refreshSubmenu();
$("#choose_specific_download_dialog").dialog("close");
});
}
function ShowSpinner() {
feedback = $("#ajaxMsg");
update = $("#updatebar");
if ( update.is(":visible") ) {
var height = update.height() + 35;
feedback.css("bottom",height + "px");
} else {
feedback.removeAttr("style");
}
loader = $("<i class='fa fa-refresh fa-spin'></i>");
feedback.prepend(loader);
feedback.fadeIn();
}
$(document).ready(function() {
getAlbumInfo();
initThisPage();

View File

@@ -193,7 +193,7 @@
template = '<li><a target="_blank" href="URI"><span class="sk-name">NAME</span><span class="sk-location">LOC</span></a></li>';
$.getJSON("http://api.songkick.com/api/3.0/artists/mbid:${artist['ArtistID']}/calendar.json?apikey=${headphones.SONGKICK_APIKEY}&jsoncallback=?",
$.getJSON("https://api.songkick.com/api/3.0/artists/mbid:${artist['ArtistID']}/calendar.json?apikey=${headphones.SONGKICK_APIKEY}&jsoncallback=?",
function(data){
if (data['resultsPage'].totalEntries >= 1) {
@@ -265,6 +265,7 @@
"aoColumnDefs": [
{ 'bSortable': false, 'aTargets': [ 0,1 ] }
],
"bStateSave": true,
"oLanguage": {
"sLengthMenu":"Show _MENU_ albums per page",
"sEmptyTable": "No album information available",

View File

@@ -31,15 +31,15 @@
<div id="container">
<div id="ajaxMsg"></div>
% if not headphones.CURRENT_VERSION:
<div id="updatebar">
You're running an unknown version of Headphones. <a href="update">Update</a> or
<a href="#" onclick="$('#updatebar').slideUp('slow');">Close</a>
</div>
% elif headphones.CURRENT_VERSION != headphones.LATEST_VERSION and headphones.INSTALL_TYPE != 'win':
<div id="updatebar">
A <a href="https://github.com/${headphones.GIT_USER}/headphones/compare/${headphones.CURRENT_VERSION}...${headphones.LATEST_VERSION}"> newer version</a> is available. You're ${headphones.COMMITS_BEHIND} commits behind. <a href="update">Update</a> or <a href="#" onclick="$('#updatebar').slideUp('slow');">Close</a>
</div>
% endif
<div id="updatebar">
You're running an unknown version of Headphones. <a href="update">Update</a> or
<a href="#" onclick="$('#updatebar').slideUp('slow');">Close</a>
</div>
% elif headphones.CURRENT_VERSION != headphones.LATEST_VERSION and headphones.COMMITS_BEHIND > 0 and headphones.INSTALL_TYPE != 'win':
<div id="updatebar">
A <a href="https://github.com/${headphones.GIT_USER}/headphones/compare/${headphones.CURRENT_VERSION}...${headphones.LATEST_VERSION}"> newer version</a> is available. You're ${headphones.COMMITS_BEHIND} commits behind. <a href="update">Update</a> or <a href="#" onclick="$('#updatebar').slideUp('slow');">Close</a>
</div>
% endif
<header>
<div class="wrapper">
@@ -55,16 +55,16 @@
<li><a href="config" class="config"><i class="fa fa-gear fa-lg"></i></a></li>
</ul>
<div id="searchbar">
<form action="search" method="get">
<input type="text" value="" placeholder="Search" onfocus="if(this.value==this.defaultValue) this.value='';" name="name" />
<i class='fa fa-search mini-icon'></i>
<select name="type">
<option value="artist">Artist</option>
<option value="album">Album</option>
<form action="search" method="get">
<input type="text" value="" placeholder="Search" onfocus="if(this.value==this.defaultValue) this.value='';" name="name" />
<i class='fa fa-search mini-icon'></i>
<select name="type">
<option value="artist">Artist</option>
<option value="album">Album</option>
</select>
<input type="submit" value="Add"/>
</form>
</div>
<input type="submit" value="Add"/>
</form>
</div>
</div>
</header>
@@ -89,6 +89,9 @@
%if version.HEADPHONES_VERSION != 'master':
(${version.HEADPHONES_VERSION})
%endif
%if headphones.GIT_BRANCH != 'master':
(${headphones.GIT_BRANCH})
%endif
</div>
</footer>
<a href="#main" id="toTop"><i class="fa fa-angle-double-up"></i> <span>Back to top</span></a>

View File

@@ -200,6 +200,11 @@
<input type="text" name="torrentblackhole_dir" value="${config['torrentblackhole_dir']}" size="50">
<small>Folder your Download program watches for Torrents</small>
</div>
<div class="row checkbox">
<label>Open Magnet Links</label>
<input type="checkbox" name="open_magnet_links" value="1" ${config['open_magnet_links']}>
<small>Allow Headphones to open magnet links</small>
</div>
</fieldset>
<fieldset id="transmission_options">
<div class="row">
@@ -250,6 +255,12 @@
<label>Keep Files for Seeding</label>
<input type="checkbox" name="keep_torrent_files" value="1" ${config['keep_torrent_files']}>
</div>
<div class="row checkbox">
<label>Prefer</label>
<input type="radio" name="prefer_torrents" id="prefer_torrents_0" value="0" ${config['prefer_torrents_0']}>NZBs
<input type="radio" name="prefer_torrents" id="prefer_torrents_1" value="1" ${config['prefer_torrents_1']}>Torrents
<input type="radio" name="prefer_torrents" id="prefer_torrents_2" value="2" ${config['prefer_torrents_2']}>No Preference
</div>
</fieldset>
</td>
</tr>
@@ -486,7 +497,7 @@
<div class="row">
<label>Required Words</label>
<input type="text" name="required_words" value="${config['required_words']}" size="50">
<small>Results without these words in the title will be filtered out</small>
<small>Results without these words in the title will be filtered out. You can use OR: 'flac OR lossless OR alac, vinyl'</small>
</div>
</fieldset>
</td>
@@ -494,7 +505,12 @@
<fieldset>
<legend>Post-Processing</legend>
<div class="row checkbox left clearfix">
<input type="checkbox" name="move_files" value="1" ${config['move_files']} /><label>Move downloads to Destination Folder</label>
<input type="checkbox" name="move_files" id="move_files" value="1" ${config['move_files']} /><label>Move downloads to Destination Folder</label>
<div id="move_files_options">
<div class="row indent">
<input type="checkbox" name="replace_existing_folders" value="1" ${config['replace_existing_folders']}><label>Replace existing folders?</label>
</div>
</div>
<input type="checkbox" name="rename_files" value="1" ${config['rename_files']} /><label>Rename files</label>
<input type="checkbox" name="correct_metadata" value="1" ${config['correct_metadata']} /><label>Correct metadata</label>
<input type="checkbox" name="cleanup_files" value="1" ${config['cleanup_files']} /><label>Delete leftover files <small>(.m3u, .nfo, .sfv, .nzb, etc.)</small></label>
@@ -528,6 +544,23 @@
<table class="configtable" summary="Notifications">
<tr>
<td>
<fieldset>
<h3>Growl</h3>
<div class="row checkbox">
<input type="checkbox" name="growl_enabled" id="growl" value="1" ${config['growl_enabled']} /><label>Enable Growl Notifications</label>
</div>
<div id="growloptions">
<div class="row">
<label>Growl Host:Port</label><input type="text" name="growl_host" value="${config['growl_host']}" size="30">
</div>
<div class="row">
<label>Growl Password</label><input type="password" name="growl_password" value="${config['growl_password']}" size="30">
</div>
<div class="row checkbox">
<input type="checkbox" name="growl_onsnatch" value="1" ${config['growl_onsnatch']} /><label>Notify on snatch?</label>
</div>
</div>
</fieldset>
<fieldset>
<h3>Prowl</h3>
<div class="row checkbox">
@@ -644,6 +677,19 @@
</div>
</div>
</fieldset>
<fieldset>
<h3>LMS</h3>
<div class="checkbox row">
<input type="checkbox" name="lms_enabled" id="lms" value="1" ${config['lms_enabled']} /><label>Enable Squeezebox Updates</label>
</div>
<div id="lmsoptions">
<div class="row">
<label>LMS Host:Port</label>
<input type="text" name="lms_host" value="${config['lms_host']}" size="30">
<small>e.g. http://localhost:9000. Seperate hosts with commas</small>
</div>
</div>
</fieldset>
</td>
<td>
<fieldset>
@@ -685,6 +731,9 @@
<label>Priority (-1,0, or 1):</label>
<input type="text" name="pushover_priority" value="${config['pushover_priority']}" size="2">
</div>
<div class="row">
<label>API Token (leave blank to use Headphones default):</label><input type="text" name="pushover_apitoken" value="${config['pushover_apitoken']}" size="50">
</div>
</div>
</fieldset>
@@ -792,6 +841,18 @@
<option value="xld" ${xldselect}>xld</option>
</select>
</div>
<div class="row">
<div class="row">
<input type="checkbox" name="encoder_multicore" value="1" ${config['encoder_multicore']}/><label>Enable multi-core</label>
</div>
</div>
<div class="row">
<div class="row">
<label>Multi-core count:</label>
<input type="text" name="encoder_multicore_count" value="${config['encoder_multicore_count']}" size="7">
<small>Set equal to the number of cores, or 0 for auto</small>
</div>
</div>
</fieldset>
<fieldset>
<legend>Audio Properties</legend>
@@ -1177,6 +1238,46 @@
}
});
if ($("#move_files").is(":checked"))
{
$("#move_files_options").show();
}
else
{
$("#move_files_options").hide();
}
$("#move_files").click(function(){
if ($("#move_files").is(":checked"))
{
$("#move_files_options").slideDown();
}
else
{
$("#move_files_options").slideUp();
}
});
if ($("#growl").is(":checked"))
{
$("#growloptions").show();
}
else
{
$("#growloptions").hide();
}
$("#growl").click(function(){
if ($("#growl").is(":checked"))
{
$("#growloptions").slideDown();
}
else
{
$("#growloptions").slideUp();
}
});
if ($("#prowl").is(":checked"))
{
$("#prowloptions").show();
@@ -1217,6 +1318,26 @@
}
});
if ($("#lms").is(":checked"))
{
$("#lmsoptions").show();
}
else
{
$("#lmsoptions").hide();
}
$("#lms").click(function(){
if ($("#lms").is(":checked"))
{
$("#lmsoptions").slideDown();
}
else
{
$("#lmsoptions").slideUp();
}
});
if ($("#plex").is(":checked"))
{
$("#plexoptions").show();

View File

@@ -393,6 +393,9 @@ form .checkbox small {
margin: 0 !important;
width: auto;
}
form .indent input {
margin-left: 15px;
}
ul,
ol {
margin-left: 2em;
@@ -688,7 +691,7 @@ footer {
}
#subhead #subhead_container #subhead_menu {
float: right;
margin-top: 5px;
margin-top: 0px;
position: relative;
z-index: 99;
}
@@ -713,6 +716,11 @@ footer {
color: #FFF;
border-color: #518CC6 #518CC6 #2A65A0;
}
#subhead #subhead_container #back_to_previous_link {
margin-top: 20px;
position: relative;
z-index: 99;
}
div#searchbar {
border-left: 1px solid #FAFAFA;
-moz-box-shadow: -1px 0 0 #e0e0e0;
@@ -769,7 +777,7 @@ div#searchbar .mini-icon {
_height: 302px;
background-color: #FFF;
clear: both;
margin: 30px auto 0;
margin: 60px auto 0;
min-height: 100px;
position: relative;
width: 100%;
@@ -1051,6 +1059,22 @@ div#artistheader h2 a {
text-align: center;
vertical-align: middle;
}
#downloads_table th#title {
text-align: center;
min-width: 500px;
}
#downloads_table th#size {
text-align: center;
min-width: 80px;
}
#downloads_table th#provider {
text-align: center;
min-width: 100px;
}
#downloads_table th#kind {
text-align: center;
min-width: 80px;
}
#history_table {
background-color: #FFF;
font-size: 13px;
@@ -1212,6 +1236,7 @@ div#artistheader h2 a {
position: relative;
top: 7px;
}
#trashcan {margin-top:15px;}
.cloudtag {
font-size: 16px;
}
@@ -1321,6 +1346,15 @@ div#artistheader h2 a {
.clearfix:after {
clear: both;
}
.override-float {
float: none !important;
margin-bottom: 0px !important;
clear: none !important;
display: inline !important;
font-size: 10px;
line-height: 10px;
height: 10px;
}
#album_table th#albumname,
#upcoming_table th#artistname,
#wanted_table th#artistname {

View File

@@ -26,6 +26,7 @@
<th id="size">Size</th>
<th id="status">Status</th>
<th id="action"></th>
<th id="delete"></th>
</tr>
</thead>
<tbody>
@@ -54,6 +55,7 @@
<td id="size">${helpers.bytes_to_mb(item['Size'])}</td>
<td id="status">${item['Status']}</td>
<td id="action">[<a href="#" onclick="doAjaxCall('queueAlbum?AlbumID=${item['AlbumID']}&redirect=history', $(this),'table')" data-success="Retrying download of '${item['Title']}'">retry</a>][<a href="#" onclick="doAjaxCall('queueAlbum?AlbumID=${item['AlbumID']}&new=True&redirect=history',$(this),'table')" data-success="Looking for a new version of '${item['Title']}'">new</a>]</td>
<td id="delete"><a href="#" onclick="doAjaxCall('clearhistory?date_added=${item['DateAdded']}&title=${item['Title']}',$(this),'table')" data-success="${item['Title']} cleared from history"><img src="interfaces/default/images/trashcan.png" height="18" width="18" id="trashcan" title="Clear this item from the history"></a>
</tr>
%endfor
</tbody>

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 KiB

View File

@@ -122,7 +122,6 @@
<div class="links">
<a href="#" onclick="doAjaxCall('forceSearch',$(this))" data-success="Checking for wanted albums successful" data-error="Error checking wanted albums"><i class="fa fa-search fa-fw"></i> Force Check for Wanted Albums</a>
<a href="#" onclick="doAjaxCall('forceUpdate',$(this))" data-success="Update active artists successful" data-error="Error forcing update artists"><i class="fa fa-heart fa-fw"></i> Force Update Active Artists [Fast]</a>
<a href="#" onclick="doAjaxCall('forcePostProcess',$(this))" data-success="Post-Processor is being loaded" data-error="Error during Post-Processing"><i class="fa fa-wrench fa-fw"></i> Force Post-Process Albums in Download Folder</a>
<a href="#" onclick="doAjaxCall('checkGithub',$(this))" data-success="Checking for update successful" data-error="Error checking for update"><i class="fa fa-refresh fa-fw"></i> Check for Headphones Updates</a>
<a href="#" id="delete_empty_artists"><i class="fa fa-trash-o fa-fw"></i> Delete empty Artists</a>
<div id="emptyartistdialog" title="Confirm Artist Deletion" style="display:none" class="configtable">
@@ -137,8 +136,27 @@
No empty artists found.
%endif
</div>
<a href="#" onclick="doAjaxCall('forcePostProcess',$(this))" data-success="Post-Processor is being loaded" data-error="Error during Post-Processing"><i class="fa fa-wrench fa-fw"></i> Force Post-Process Albums in Download Folder</a>
</div>
</fieldset>
<form action="forcePostProcess" method="GET">
<fieldset>
<div class="row">
<label>Force Post-Process Albums in Alternate Folder</label>
<input type="text" value="" name="dir" id="dir" size="50" /><input type="submit" />
</div>
</fieldset>
</form>
<form action="forcePostProcess" method="GET">
<fieldset>
<div class="row">
<label>Post-Process Single Folder</label>
<input type="text" value="" name="album_dir" id="album_dir" size="50" /><input type="submit" />
</div>
</fieldset>
</form>
</div>

View File

@@ -82,7 +82,8 @@ API_KEY = None
GIT_PATH = None
GIT_USER = None
GIT_BRANCH =None
GIT_BRANCH = None
DO_NOT_OVERRIDE_GIT_BRANCH = False
INSTALL_TYPE = None
CURRENT_VERSION = None
LATEST_VERSION = None
@@ -114,6 +115,7 @@ ADD_ALBUM_ART = False
ALBUM_ART_FORMAT = None
EMBED_ALBUM_ART = False
EMBED_LYRICS = False
REPLACE_EXISTING_FOLDERS = False
NZB_DOWNLOADER = None # 0: sabnzbd, 1: nzbget, 2: blackhole
TORRENT_DOWNLOADER = None # 0: blackhole, 1: transmission, 2: utorrent
DOWNLOAD_DIR = None
@@ -125,6 +127,8 @@ EXTRAS = None
AUTOWANT_UPCOMING = False
AUTOWANT_ALL = False
KEEP_TORRENT_FILES = False
PREFER_TORRENTS = None # 0: nzbs, 1: torrents, 2: no preference
OPEN_MAGNET_LINKS = False
SEARCH_INTERVAL = 360
LIBRARYSCAN = False
@@ -178,7 +182,7 @@ REQUIRED_WORDS = None
LASTFM_USERNAME = None
LOSSY_MEDIA_FORMATS = ["mp3", "aac", "ogg", "ape", "m4a"]
LOSSY_MEDIA_FORMATS = ["mp3", "aac", "ogg", "ape", "m4a", "asf", "wma"]
LOSSLESS_MEDIA_FORMATS = ["flac"]
MEDIA_FORMATS = LOSSY_MEDIA_FORMATS + LOSSLESS_MEDIA_FORMATS
@@ -218,7 +222,13 @@ ENCODEROUTPUTFORMAT = None
ENCODERQUALITY = None
ENCODERVBRCBR = None
ENCODERLOSSLESS = False
ENCODER_MULTICORE = False
ENCODER_MULTICORE_COUNT = 0
DELETE_LOSSLESS_FILES = False
GROWL_ENABLED = True
GROWL_HOST = None
GROWL_PASSWORD = None
GROWL_ONSNATCH = True
PROWL_ENABLED = True
PROWL_PRIORITY = 1
PROWL_KEYS = None
@@ -229,6 +239,8 @@ XBMC_USERNAME = None
XBMC_PASSWORD = None
XBMC_UPDATE = False
XBMC_NOTIFY = False
LMS_ENABLED = False
LMS_HOST = None
PLEX_ENABLED = False
PLEX_SERVER_HOST = None
PLEX_CLIENT_HOST = None
@@ -248,6 +260,7 @@ PUSHOVER_ENABLED = True
PUSHOVER_PRIORITY = 1
PUSHOVER_KEYS = None
PUSHOVER_ONSNATCH = True
PUSHOVER_APITOKEN = None
PUSHBULLET_ENABLED = True
PUSHBULLET_APIKEY = None
PUSHBULLET_DEVICEID = None
@@ -296,7 +309,7 @@ def check_setting_int(config, cfg_name, item_name, def_val):
except:
config[cfg_name] = {}
config[cfg_name][item_name] = my_val
logger.debug(item_name + " -> " + str(my_val))
logger.debug("%s -> %s", item_name, my_val)
return my_val
################################################################################
@@ -313,10 +326,7 @@ def check_setting_str(config, cfg_name, item_name, def_val, log=True):
config[cfg_name] = {}
config[cfg_name][item_name] = my_val
if log:
logger.debug(item_name + " -> " + my_val)
else:
logger.debug(item_name + " -> ******")
logger.debug("%s -> %s", item_name, my_val if log else "******")
return my_val
def initialize():
@@ -324,11 +334,11 @@ def initialize():
with INIT_LOCK:
global __INITIALIZED__, FULL_PATH, PROG_DIR, VERBOSE, DAEMON, SYS_PLATFORM, DATA_DIR, CONFIG_FILE, CFG, CONFIG_VERSION, LOG_DIR, CACHE_DIR, \
HTTP_PORT, HTTP_HOST, HTTP_USERNAME, HTTP_PASSWORD, HTTP_ROOT, HTTP_PROXY, LAUNCH_BROWSER, API_ENABLED, API_KEY, GIT_PATH, GIT_USER, GIT_BRANCH, \
HTTP_PORT, HTTP_HOST, HTTP_USERNAME, HTTP_PASSWORD, HTTP_ROOT, HTTP_PROXY, LAUNCH_BROWSER, API_ENABLED, API_KEY, GIT_PATH, GIT_USER, GIT_BRANCH, DO_NOT_OVERRIDE_GIT_BRANCH, \
CURRENT_VERSION, LATEST_VERSION, CHECK_GITHUB, CHECK_GITHUB_ON_STARTUP, CHECK_GITHUB_INTERVAL, MUSIC_DIR, DESTINATION_DIR, \
LOSSLESS_DESTINATION_DIR, PREFERRED_QUALITY, PREFERRED_BITRATE, DETECT_BITRATE, ADD_ARTISTS, CORRECT_METADATA, MOVE_FILES, \
RENAME_FILES, FOLDER_FORMAT, FILE_FORMAT, FILE_UNDERSCORES, CLEANUP_FILES, INCLUDE_EXTRAS, EXTRAS, AUTOWANT_UPCOMING, AUTOWANT_ALL, KEEP_TORRENT_FILES, \
ADD_ALBUM_ART, ALBUM_ART_FORMAT, EMBED_ALBUM_ART, EMBED_LYRICS, DOWNLOAD_DIR, BLACKHOLE, BLACKHOLE_DIR, USENET_RETENTION, SEARCH_INTERVAL, \
RENAME_FILES, FOLDER_FORMAT, FILE_FORMAT, FILE_UNDERSCORES, CLEANUP_FILES, INCLUDE_EXTRAS, EXTRAS, AUTOWANT_UPCOMING, AUTOWANT_ALL, KEEP_TORRENT_FILES, PREFER_TORRENTS, OPEN_MAGNET_LINKS, \
ADD_ALBUM_ART, ALBUM_ART_FORMAT, EMBED_ALBUM_ART, EMBED_LYRICS, REPLACE_EXISTING_FOLDERS, DOWNLOAD_DIR, BLACKHOLE, BLACKHOLE_DIR, USENET_RETENTION, SEARCH_INTERVAL, \
TORRENTBLACKHOLE_DIR, NUMBEROFSEEDERS, ISOHUNT, KAT, PIRATEBAY, PIRATEBAY_PROXY_URL, MININOVA, WAFFLES, WAFFLES_UID, WAFFLES_PASSKEY, \
RUTRACKER, RUTRACKER_USER, RUTRACKER_PASSWORD, WHATCD, WHATCD_USERNAME, WHATCD_PASSWORD, DOWNLOAD_TORRENT_DIR, \
LIBRARYSCAN, LIBRARYSCAN_INTERVAL, DOWNLOAD_SCAN_INTERVAL, UPDATE_DB_INTERVAL, MB_IGNORE_AGE, SAB_HOST, SAB_USERNAME, SAB_PASSWORD, SAB_APIKEY, SAB_CATEGORY, \
@@ -337,12 +347,12 @@ def initialize():
NZBSORG, NZBSORG_UID, NZBSORG_HASH, NZBSRUS, NZBSRUS_UID, NZBSRUS_APIKEY, OMGWTFNZBS, OMGWTFNZBS_UID, OMGWTFNZBS_APIKEY, \
NZB_DOWNLOADER, TORRENT_DOWNLOADER, PREFERRED_WORDS, REQUIRED_WORDS, IGNORED_WORDS, LASTFM_USERNAME, \
INTERFACE, FOLDER_PERMISSIONS, FILE_PERMISSIONS, ENCODERFOLDER, ENCODER_PATH, ENCODER, XLDPROFILE, BITRATE, SAMPLINGFREQUENCY, \
MUSIC_ENCODER, ADVANCEDENCODER, ENCODEROUTPUTFORMAT, ENCODERQUALITY, ENCODERVBRCBR, ENCODERLOSSLESS, DELETE_LOSSLESS_FILES, \
PROWL_ENABLED, PROWL_PRIORITY, PROWL_KEYS, PROWL_ONSNATCH, PUSHOVER_ENABLED, PUSHOVER_PRIORITY, PUSHOVER_KEYS, PUSHOVER_ONSNATCH, MIRRORLIST, \
MUSIC_ENCODER, ADVANCEDENCODER, ENCODEROUTPUTFORMAT, ENCODERQUALITY, ENCODERVBRCBR, ENCODERLOSSLESS, ENCODER_MULTICORE, ENCODER_MULTICORE_COUNT, DELETE_LOSSLESS_FILES, \
GROWL_ENABLED, GROWL_HOST, GROWL_PASSWORD, GROWL_ONSNATCH, PROWL_ENABLED, PROWL_PRIORITY, PROWL_KEYS, PROWL_ONSNATCH, PUSHOVER_ENABLED, PUSHOVER_PRIORITY, PUSHOVER_KEYS, PUSHOVER_ONSNATCH, PUSHOVER_APITOKEN, MIRRORLIST, \
TWITTER_ENABLED, TWITTER_ONSNATCH, TWITTER_USERNAME, TWITTER_PASSWORD, TWITTER_PREFIX, \
PUSHBULLET_ENABLED, PUSHBULLET_APIKEY, PUSHBULLET_DEVICEID, PUSHBULLET_ONSNATCH, \
MIRROR, CUSTOMHOST, CUSTOMPORT, CUSTOMSLEEP, HPUSER, HPPASS, XBMC_ENABLED, XBMC_HOST, XBMC_USERNAME, XBMC_PASSWORD, XBMC_UPDATE, \
XBMC_NOTIFY, NMA_ENABLED, NMA_APIKEY, NMA_PRIORITY, NMA_ONSNATCH, SYNOINDEX_ENABLED, ALBUM_COMPLETION_PCT, PREFERRED_BITRATE_HIGH_BUFFER, \
XBMC_NOTIFY, LMS_ENABLED, LMS_HOST, NMA_ENABLED, NMA_APIKEY, NMA_PRIORITY, NMA_ONSNATCH, SYNOINDEX_ENABLED, ALBUM_COMPLETION_PCT, PREFERRED_BITRATE_HIGH_BUFFER, \
PREFERRED_BITRATE_LOW_BUFFER, PREFERRED_BITRATE_ALLOW_LOSSLESS, CACHE_SIZEMB, JOURNAL_MODE, UMASK, ENABLE_HTTPS, HTTPS_CERT, HTTPS_KEY, \
PLEX_ENABLED, PLEX_SERVER_HOST, PLEX_CLIENT_HOST, PLEX_USERNAME, PLEX_PASSWORD, PLEX_UPDATE, PLEX_NOTIFY, PUSHALOT_ENABLED, PUSHALOT_APIKEY, \
PUSHALOT_ONSNATCH, SONGKICK_ENABLED, SONGKICK_APIKEY, SONGKICK_LOCATION, SONGKICK_FILTER_ENABLED
@@ -365,10 +375,12 @@ def initialize():
CheckSection('Waffles')
CheckSection('Rutracker')
CheckSection('What.cd')
CheckSection('Growl')
CheckSection('Prowl')
CheckSection('Pushover')
CheckSection('PushBullet')
CheckSection('XBMC')
CheckSection('LMS')
CheckSection('Plex')
CheckSection('NMA')
CheckSection('Pushalot')
@@ -402,6 +414,7 @@ def initialize():
GIT_PATH = check_setting_str(CFG, 'General', 'git_path', '')
GIT_USER = check_setting_str(CFG, 'General', 'git_user', 'rembo10')
GIT_BRANCH = check_setting_str(CFG, 'General', 'git_branch', 'master')
DO_NOT_OVERRIDE_GIT_BRANCH = check_setting_int(CFG, 'General', 'do_not_override_git_branch', 0)
LOG_DIR = check_setting_str(CFG, 'General', 'log_dir', '')
CACHE_DIR = check_setting_str(CFG, 'General', 'cache_dir', '')
@@ -430,6 +443,7 @@ def initialize():
ALBUM_ART_FORMAT = check_setting_str(CFG, 'General', 'album_art_format', 'folder')
EMBED_ALBUM_ART = bool(check_setting_int(CFG, 'General', 'embed_album_art', 0))
EMBED_LYRICS = bool(check_setting_int(CFG, 'General', 'embed_lyrics', 0))
REPLACE_EXISTING_FOLDERS = bool(check_setting_int(CFG, 'General', 'replace_existing_folders', 0))
NZB_DOWNLOADER = check_setting_int(CFG, 'General', 'nzb_downloader', 0)
TORRENT_DOWNLOADER = check_setting_int(CFG, 'General', 'torrent_downloader', 0)
DOWNLOAD_DIR = check_setting_str(CFG, 'General', 'download_dir', '')
@@ -441,6 +455,8 @@ def initialize():
AUTOWANT_UPCOMING = bool(check_setting_int(CFG, 'General', 'autowant_upcoming', 1))
AUTOWANT_ALL = bool(check_setting_int(CFG, 'General', 'autowant_all', 0))
KEEP_TORRENT_FILES = bool(check_setting_int(CFG, 'General', 'keep_torrent_files', 0))
PREFER_TORRENTS = check_setting_int(CFG, 'General', 'prefer_torrents', 0)
OPEN_MAGNET_LINKS = bool(check_setting_int(CFG, 'General', 'open_magnet_links', 0))
SEARCH_INTERVAL = check_setting_int(CFG, 'General', 'search_interval', 1440)
LIBRARYSCAN = bool(check_setting_int(CFG, 'General', 'libraryscan', 1))
@@ -534,8 +550,15 @@ def initialize():
ENCODERQUALITY = check_setting_int(CFG, 'General', 'encoderquality', 2)
ENCODERVBRCBR = check_setting_str(CFG, 'General', 'encodervbrcbr', 'cbr')
ENCODERLOSSLESS = bool(check_setting_int(CFG, 'General', 'encoderlossless', 1))
ENCODER_MULTICORE = bool(check_setting_int(CFG, 'General', 'encoder_multicore', 0))
ENCODER_MULTICORE_COUNT = max(0, check_setting_int(CFG, 'General', 'encoder_multicore_count', 0))
DELETE_LOSSLESS_FILES = bool(check_setting_int(CFG, 'General', 'delete_lossless_files', 1))
GROWL_ENABLED = bool(check_setting_int(CFG, 'Growl', 'growl_enabled', 0))
GROWL_HOST = check_setting_str(CFG, 'Growl', 'growl_host', '')
GROWL_PASSWORD = check_setting_str(CFG, 'Growl', 'growl_password', '')
GROWL_ONSNATCH = bool(check_setting_int(CFG, 'Growl', 'growl_onsnatch', 0))
PROWL_ENABLED = bool(check_setting_int(CFG, 'Prowl', 'prowl_enabled', 0))
PROWL_KEYS = check_setting_str(CFG, 'Prowl', 'prowl_keys', '')
PROWL_ONSNATCH = bool(check_setting_int(CFG, 'Prowl', 'prowl_onsnatch', 0))
@@ -548,6 +571,9 @@ def initialize():
XBMC_UPDATE = bool(check_setting_int(CFG, 'XBMC', 'xbmc_update', 0))
XBMC_NOTIFY = bool(check_setting_int(CFG, 'XBMC', 'xbmc_notify', 0))
LMS_ENABLED = bool(check_setting_int(CFG, 'LMS', 'lms_enabled', 0))
LMS_HOST = check_setting_str(CFG, 'LMS', 'lms_host', '')
PLEX_ENABLED = bool(check_setting_int(CFG, 'Plex', 'plex_enabled', 0))
PLEX_SERVER_HOST = check_setting_str(CFG, 'Plex', 'plex_server_host', '')
PLEX_CLIENT_HOST = check_setting_str(CFG, 'Plex', 'plex_client_host', '')
@@ -571,6 +597,7 @@ def initialize():
PUSHOVER_KEYS = check_setting_str(CFG, 'Pushover', 'pushover_keys', '')
PUSHOVER_ONSNATCH = bool(check_setting_int(CFG, 'Pushover', 'pushover_onsnatch', 0))
PUSHOVER_PRIORITY = check_setting_int(CFG, 'Pushover', 'pushover_priority', 0)
PUSHOVER_APITOKEN = check_setting_str(CFG, 'Pushover', 'pushover_apitoken', '')
PUSHBULLET_ENABLED = bool(check_setting_int(CFG, 'PushBullet', 'pushbullet_enabled', 0))
PUSHBULLET_APIKEY = check_setting_str(CFG, 'PushBullet', 'pushbullet_apikey', '')
@@ -668,10 +695,10 @@ def initialize():
os.makedirs(LOG_DIR)
except OSError:
if VERBOSE:
print 'Unable to create the log directory. Logging to screen only.'
sys.stderr.write('Unable to create the log directory. Logging to screen only.\n')
# Start the logger, silence console logging if we need to
logger.headphones_log.initLogger(verbose=VERBOSE)
logger.initLogger(verbose=VERBOSE)
if not CACHE_DIR:
# Put the cache dir in the data dir for now
@@ -680,7 +707,7 @@ def initialize():
try:
os.makedirs(CACHE_DIR)
except OSError:
logger.error('Could not create cache dir. Check permissions of datadir: ' + DATA_DIR)
logger.error('Could not create cache dir. Check permissions of datadir: %s', DATA_DIR)
# Sanity check for search interval. Set it to at least 6 hours
if SEARCH_INTERVAL < 360:
@@ -692,17 +719,18 @@ def initialize():
try:
dbcheck()
except Exception, e:
logger.error("Can't connect to the database: %s" % e)
logger.error("Can't connect to the database: %s", e)
# Get the currently installed version - returns None, 'win32' or the git hash
# Also sets INSTALL_TYPE variable to 'win', 'git' or 'source'
CURRENT_VERSION = versioncheck.getVersion()
CURRENT_VERSION, GIT_BRANCH = versioncheck.getVersion()
# Check for new versions
if CHECK_GITHUB_ON_STARTUP:
try:
LATEST_VERSION = versioncheck.checkGithub()
except:
logger.exception("Unhandled exception")
LATEST_VERSION = CURRENT_VERSION
else:
LATEST_VERSION = CURRENT_VERSION
@@ -729,7 +757,7 @@ def daemonize():
if pid != 0:
sys.exit(0)
except OSError, e:
raise RuntimeError("1st fork failed: %s [%d]" % (e.strerror, e.errno))
raise RuntimeError("1st fork failed: %s [%d]", e.strerror, e.errno)
os.setsid()
@@ -743,7 +771,7 @@ def daemonize():
if pid != 0:
sys.exit(0)
except OSError, e:
raise RuntimeError("2nd fork failed: %s [%d]" % (e.strerror, e.errno))
raise RuntimeError("2nd fork failed: %s [%d]", e.strerror, e.errno)
dev_null = file('/dev/null', 'r')
os.dup2(dev_null.fileno(), sys.stdin.fileno())
@@ -756,12 +784,13 @@ def daemonize():
os.dup2(so.fileno(), sys.stdout.fileno())
os.dup2(se.fileno(), sys.stderr.fileno())
pid = str(os.getpid())
logger.info('Daemonized to PID: %s' % pid)
pid = os.getpid()
logger.info('Daemonized to PID: %d', pid)
if CREATEPID:
logger.info("Writing PID " + pid + " to " + str(PIDFILE))
file(PIDFILE, 'w').write("%s\n" % pid)
logger.info("Writing PID %d to %s", pid, PIDFILE)
with file(PIDFILE, 'w') as fp:
fp.write("%s\n" % pid)
def launch_browser(host, port, root):
@@ -776,7 +805,7 @@ def launch_browser(host, port, root):
try:
webbrowser.open('%s://%s:%i%s' % (protocol, host, port, root))
except Exception, e:
logger.error('Could not launch browser: %s' % e)
logger.error('Could not launch browser: %s', e)
def config_write():
@@ -802,6 +831,7 @@ def config_write():
new_config['General']['git_path'] = GIT_PATH
new_config['General']['git_user'] = GIT_USER
new_config['General']['git_branch'] = GIT_BRANCH
new_config['General']['do_not_override_git_branch'] = int(DO_NOT_OVERRIDE_GIT_BRANCH)
new_config['General']['check_github'] = int(CHECK_GITHUB)
new_config['General']['check_github_on_startup'] = int(CHECK_GITHUB_ON_STARTUP)
@@ -828,6 +858,7 @@ def config_write():
new_config['General']['album_art_format'] = ALBUM_ART_FORMAT
new_config['General']['embed_album_art'] = int(EMBED_ALBUM_ART)
new_config['General']['embed_lyrics'] = int(EMBED_LYRICS)
new_config['General']['replace_existing_folders'] = int(REPLACE_EXISTING_FOLDERS)
new_config['General']['nzb_downloader'] = NZB_DOWNLOADER
new_config['General']['torrent_downloader'] = TORRENT_DOWNLOADER
new_config['General']['download_dir'] = DOWNLOAD_DIR
@@ -838,6 +869,8 @@ def config_write():
new_config['General']['autowant_upcoming'] = int(AUTOWANT_UPCOMING)
new_config['General']['autowant_all'] = int(AUTOWANT_ALL)
new_config['General']['keep_torrent_files'] = int(KEEP_TORRENT_FILES)
new_config['General']['prefer_torrents'] = PREFER_TORRENTS
new_config['General']['open_magnet_links'] = OPEN_MAGNET_LINKS
new_config['General']['numberofseeders'] = NUMBEROFSEEDERS
new_config['General']['torrentblackhole_dir'] = TORRENTBLACKHOLE_DIR
@@ -928,6 +961,12 @@ def config_write():
new_config['General']['ignored_words'] = IGNORED_WORDS
new_config['General']['required_words'] = REQUIRED_WORDS
new_config['Growl'] = {}
new_config['Growl']['growl_enabled'] = int(GROWL_ENABLED)
new_config['Growl']['growl_host'] = GROWL_HOST
new_config['Growl']['growl_password'] = GROWL_PASSWORD
new_config['Growl']['growl_onsnatch'] = int(GROWL_ONSNATCH)
new_config['Prowl'] = {}
new_config['Prowl']['prowl_enabled'] = int(PROWL_ENABLED)
new_config['Prowl']['prowl_keys'] = PROWL_KEYS
@@ -942,6 +981,10 @@ def config_write():
new_config['XBMC']['xbmc_update'] = int(XBMC_UPDATE)
new_config['XBMC']['xbmc_notify'] = int(XBMC_NOTIFY)
new_config['LMS'] = {}
new_config['LMS']['lms_enabled'] = int(LMS_ENABLED)
new_config['LMS']['lms_host'] = LMS_HOST
new_config['Plex'] = {}
new_config['Plex']['plex_enabled'] = int(PLEX_ENABLED)
new_config['Plex']['plex_server_host'] = PLEX_SERVER_HOST
@@ -967,6 +1010,7 @@ def config_write():
new_config['Pushover']['pushover_keys'] = PUSHOVER_KEYS
new_config['Pushover']['pushover_onsnatch'] = int(PUSHOVER_ONSNATCH)
new_config['Pushover']['pushover_priority'] = int(PUSHOVER_PRIORITY)
new_config['Pushover']['pushover_apitoken'] = PUSHOVER_APITOKEN
new_config['PushBullet'] = {}
new_config['PushBullet']['pushbullet_enabled'] = int(PUSHBULLET_ENABLED)
@@ -1006,6 +1050,8 @@ def config_write():
new_config['General']['encoderquality'] = ENCODERQUALITY
new_config['General']['encodervbrcbr'] = ENCODERVBRCBR
new_config['General']['encoderlossless'] = int(ENCODERLOSSLESS)
new_config['General']['encoder_multicore'] = int(ENCODER_MULTICORE)
new_config['General']['encoder_multicore_count'] = int(ENCODER_MULTICORE_COUNT)
new_config['General']['delete_lossless_files'] = int(DELETE_LOSSLESS_FILES)
new_config['General']['mirror'] = MIRROR
@@ -1048,7 +1094,7 @@ def start():
def sig_handler(signum=None, frame=None):
if type(signum) != type(None):
logger.info("Signal %i caught, saving and exiting..." % int(signum))
logger.info("Signal %i caught, saving and exiting...", signum)
shutdown()
def dbcheck():
@@ -1267,10 +1313,10 @@ def shutdown(restart=False, update=False):
try:
versioncheck.update()
except Exception, e:
logger.warn('Headphones failed to update: %s. Restarting.' % e)
logger.warn('Headphones failed to update: %s. Restarting.', e)
if CREATEPID :
logger.info ('Removing pidfile %s' % PIDFILE)
logger.info ('Removing pidfile %s', PIDFILE)
os.remove(PIDFILE)
if restart:
@@ -1279,7 +1325,7 @@ def shutdown(restart=False, update=False):
popen_list += ARGS
if '--nolaunch' not in popen_list:
popen_list += ['--nolaunch']
logger.info('Restarting Headphones with ' + str(popen_list))
logger.info('Restarting Headphones with %s', popen_list)
subprocess.Popen(popen_list, cwd=os.getcwd())
os._exit(0)

View File

@@ -13,39 +13,30 @@
# You should have received a copy of the GNU General Public License
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
import urllib2
from headphones import db
from headphones import request, db
def getAlbumArt(albumid):
myDB = db.DBConnection()
asin = myDB.action('SELECT AlbumASIN from albums WHERE AlbumID=?', [albumid]).fetchone()[0]
if not asin:
return None
url = 'http://ec1.images-amazon.com/images/P/%s.01.LZZZZZZZ.jpg' % asin
return url
if asin:
return 'http://ec1.images-amazon.com/images/P/%s.01.LZZZZZZZ.jpg' % asin
def getCachedArt(albumid):
from headphones import cache
c = cache.Cache()
artwork_path = c.get_artwork_from_cache(AlbumID=albumid)
if not artwork_path:
return None
return
if artwork_path.startswith('http://'):
try:
artwork = urllib2.urlopen(artwork_path, timeout=20).read()
return artwork
except:
logger.warn("Unable to open url: " + artwork_path)
return None
artwork = request.request_content(artwork_path, timeout=20)
if not artwork:
logger.warn("Unable to open url: %s", artwork_path)
return
else:
artwork = open(artwork_path, "r").read()
return artwork
with open(artwork_path, "r") as fp:
return fp.read()

View File

@@ -24,7 +24,8 @@ import copy
cmd_list = [ 'getIndex', 'getArtist', 'getAlbum', 'getUpcoming', 'getWanted', 'getSimilar', 'getHistory', 'getLogs',
'findArtist', 'findAlbum', 'addArtist', 'delArtist', 'pauseArtist', 'resumeArtist', 'refreshArtist',
'addAlbum', 'queueAlbum', 'unqueueAlbum', 'forceSearch', 'forceProcess', 'getVersion', 'checkGithub',
'shutdown', 'restart', 'update', 'getArtistArt', 'getAlbumArt', 'getArtistInfo', 'getAlbumInfo', 'getArtistThumb', 'getAlbumThumb']
'shutdown', 'restart', 'update', 'getArtistArt', 'getAlbumArt', 'getArtistInfo', 'getAlbumInfo',
'getArtistThumb', 'getAlbumThumb', 'choose_specific_download', 'download_specific_release']
class Api(object):
@@ -78,8 +79,8 @@ class Api(object):
def fetchData(self):
if self.data == 'OK':
logger.info('Recieved API command: ' + self.cmd)
if self.data == 'OK':
logger.info('Recieved API command: %s', self.cmd)
methodToCall = getattr(self, "_" + self.cmd)
result = methodToCall(**self.kwargs)
if 'callback' not in self.kwargs:
@@ -306,7 +307,10 @@ class Api(object):
searcher.searchforalbum()
def _forceProcess(self, **kwargs):
postprocessor.forcePostProcess()
self.dir = None
if 'dir' in kwargs:
self.dir = kwargs['dir']
postprocessor.forcePostProcess(self.dir)
def _getVersion(self, **kwargs):
self.data = {
@@ -389,3 +393,65 @@ class Api(object):
self.id = kwargs['id']
self.data = cache.getThumb(AlbumID=self.id)
def _choose_specific_download(self, **kwargs):
if 'id' not in kwargs:
self.data = 'Missing parameter: id'
return
else:
self.id = kwargs['id']
results = searcher.searchforalbum(self.id, choose_specific_download=True)
results_as_dicts = []
for result in results:
result_dict = {
'title':result[0],
'size':result[1],
'url':result[2],
'provider':result[3],
'kind':result[4]
}
results_as_dicts.append(result_dict)
self.data = results_as_dicts
def _download_specific_release(self, **kwargs):
expected_kwargs =['id', 'title','size','url','provider','kind']
for kwarg in expected_kwargs:
if kwarg not in kwargs:
self.data = 'Missing parameter: ' + kwarg
return self.data
title = kwargs['title']
size = kwargs['size']
url = kwargs['url']
provider = kwargs['provider']
kind = kwargs['kind']
id = kwargs['id']
for kwarg in expected_kwargs:
del kwargs[kwarg]
# Handle situations where the torrent url contains arguments that are parsed
if kwargs:
import urllib, urllib2
url = urllib2.quote(url, safe=":?/=&") + '&' + urllib.urlencode(kwargs)
try:
result = [(title,int(size),url,provider,kind)]
except ValueError:
result = [(title,float(size),url,provider,kind)]
logger.info(u"Making sure we can download the chosen result")
(data, bestqual) = searcher.preprocess(result)
if data and bestqual:
myDB = db.DBConnection()
album = myDB.action('SELECT * from albums WHERE AlbumID=?', [id]).fetchone()
searcher.send_to_downloader(data, bestqual, album)

View File

@@ -14,53 +14,52 @@
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
import os
import glob, urllib, urllib2
import lib.simplejson as simplejson
import glob
import urllib
import headphones
from headphones import db, helpers, logger
from headphones import db, helpers, logger, lastfm, request
lastfm_apikey = "690e1ed3bc00bc91804cd8f7fe5ed6d4"
class Cache(object):
"""
This class deals with getting, storing and serving up artwork (album
This class deals with getting, storing and serving up artwork (album
art, artist images, etc) and info/descriptions (album info, artist descrptions)
to and from the cache folder. This can be called from within a web interface,
to and from the cache folder. This can be called from within a web interface,
for example, using the helper functions getInfo(id) and getArtwork(id), to utilize the cached
images rather than having to retrieve them every time the page is reloaded.
So you can call cache.getArtwork(id) which will return an absolute path
to the image file on the local machine, or if the cache directory
doesn't exist, or can not be written to, it will return a url to the image.
Call cache.getInfo(id) to grab the artist/album info; will return the text description
The basic format for art in the cache is <musicbrainzid>.<date>.<ext>
and for info it is <musicbrainzid>.<date>.txt
"""
path_to_art_cache = os.path.join(headphones.CACHE_DIR, 'artwork')
id = None
id_type = None # 'artist' or 'album' - set automatically depending on whether ArtistID or AlbumID is passed
query_type = None # 'artwork','thumb' or 'info' - set automatically
artwork_files = []
thumb_files = []
artwork_errors = False
artwork_url = None
thumb_errors = False
thumb_url = None
info_summary = None
info_content = None
def __init__(self):
pass
def _findfilesstartingwith(self,pattern,folder):
@@ -70,7 +69,7 @@ class Cache(object):
if fname.startswith(pattern):
files.append(os.path.join(folder,fname))
return files
def _exists(self, type):
self.artwork_files = []
self.thumb_files = []
@@ -93,53 +92,53 @@ class Cache(object):
# There's probably a better way to do this
split_date = date.split('-')
days_old = int(split_date[0])*365 + int(split_date[1])*30 + int(split_date[2])
return days_old
def _is_current(self, filename=None, date=None):
if filename:
base_filename = os.path.basename(filename)
date = base_filename.split('.')[1]
# Calculate how old the cached file is based on todays date & file date stamp
# helpers.today() returns todays date in yyyy-mm-dd format
if self._get_age(helpers.today()) - self._get_age(date) < 30:
return True
else:
return False
def _get_thumb_url(self, data):
thumb_url = None
try:
images = data[self.id_type]['image']
except KeyError:
return None
for image in images:
if image['size'] == 'medium':
thumb_url = image['#text']
break
return thumb_url
def get_artwork_from_cache(self, ArtistID=None, AlbumID=None):
'''
Pass a musicbrainz id to this function (either ArtistID or AlbumID)
'''
self.query_type = 'artwork'
if ArtistID:
self.id = ArtistID
self.id_type = 'artist'
else:
self.id = AlbumID
self.id_type = 'album'
if self._exists('artwork') and self._is_current(filename=self.artwork_files[0]):
return self.artwork_files[0]
else:
@@ -151,21 +150,21 @@ class Cache(object):
return self.artwork_files[0]
else:
return None
def get_thumb_from_cache(self, ArtistID=None, AlbumID=None):
'''
Pass a musicbrainz id to this function (either ArtistID or AlbumID)
'''
self.query_type = 'thumb'
if ArtistID:
self.id = ArtistID
self.id_type = 'artist'
else:
self.id = AlbumID
self.id_type = 'album'
if self._exists('thumb') and self._is_current(filename=self.thumb_files[0]):
return self.thumb_files[0]
else:
@@ -177,13 +176,13 @@ class Cache(object):
return self.thumb_files[0]
else:
return None
def get_info_from_cache(self, ArtistID=None, AlbumID=None):
self.query_type = 'info'
myDB = db.DBConnection()
if ArtistID:
if ArtistID:
self.id = ArtistID
self.id_type = 'artist'
db_info = myDB.action('SELECT Summary, Content, LastUpdated FROM descriptions WHERE ArtistID=?', [self.id]).fetchone()
@@ -193,7 +192,7 @@ class Cache(object):
db_info = myDB.action('SELECT Summary, Content, LastUpdated FROM descriptions WHERE ReleaseGroupID=?', [self.id]).fetchone()
if not db_info or not db_info['LastUpdated'] or not self._is_current(date=db_info['LastUpdated']):
self._update_cache()
info_dict = { 'Summary' : self.info_summary, 'Content' : self.info_content }
return info_dict
@@ -201,188 +200,114 @@ class Cache(object):
else:
info_dict = { 'Summary' : db_info['Summary'], 'Content' : db_info['Content'] }
return info_dict
def get_image_links(self, ArtistID=None, AlbumID=None):
'''
Here we're just going to open up the last.fm url, grab the image links and return them
Won't save any image urls, or save the artwork in the cache. Useful for search results, etc.
'''
if ArtistID:
self.id_type = 'artist'
params = { "method": "artist.getInfo",
"api_key": lastfm_apikey,
"mbid": ArtistID,
"format": "json"
}
url = "http://ws.audioscrobbler.com/2.0/?" + urllib.urlencode(params)
logger.debug('Retrieving artist information from: ' + url)
try:
result = urllib2.urlopen(url, timeout=20).read()
except:
logger.warn('Could not open url: ' + url)
return
if result:
try:
data = simplejson.JSONDecoder().decode(result)
except:
logger.warn('Could not parse data from url: ' + url)
return
try:
image_url = data['artist']['image'][-1]['#text']
except Exception:
logger.debug('No artist image found on url: ' + url)
image_url = None
thumb_url = self._get_thumb_url(data)
if not thumb_url:
logger.debug('No artist thumbnail image found on url: ' + url)
else:
self.id_type = 'album'
params = { "method": "album.getInfo",
"api_key": lastfm_apikey,
"mbid": AlbumID,
"format": "json"
}
url = "http://ws.audioscrobbler.com/2.0/?" + urllib.urlencode(params)
logger.debug('Retrieving album information from: ' + url)
try:
result = urllib2.urlopen(url, timeout=20).read()
except:
logger.warn('Could not open url: ' + url)
self.id_type = 'artist'
data = lastfm.request_lastfm("artist.getinfo", mbid=ArtistID, api_key=lastfm_apikey)
if not data:
return
if result:
try:
data = simplejson.JSONDecoder().decode(result)
except:
logger.warn('Could not parse data from url: ' + url)
return
try:
image_url = data['artist']['image'][-1]['#text']
except Exception:
logger.debug('No artist image found on url: ' + url)
image_url = None
thumb_url = self._get_thumb_url(data)
if not thumb_url:
logger.debug('No artist thumbnail image found on url: ' + url)
image_dict = {'artwork' : image_url, 'thumbnail' : thumb_url }
return image_dict
try:
image_url = data['artist']['image'][-1]['#text']
except Exception:
logger.debug('No artist image found')
image_url = None
thumb_url = self._get_thumb_url(data)
if not thumb_url:
logger.debug('No artist thumbnail image found')
else:
self.id_type = 'album'
data = lastfm.request_lastfm("album.getinfo", mbid=AlbumID, api_key=lastfm_apikey)
if not data:
return
try:
image_url = data['artist']['image'][-1]['#text']
except Exception:
logger.debug('No artist image found')
image_url = None
thumb_url = self._get_thumb_url(data)
if not thumb_url:
logger.debug('No artist thumbnail image found')
return {'artwork' : image_url, 'thumbnail' : thumb_url }
def _update_cache(self):
'''
Since we call the same url for both info and artwork, we'll update both at the same time
'''
myDB = db.DBConnection()
# Since lastfm uses release ids rather than release group ids for albums, we have to do a artist + album search for albums
if self.id_type == 'artist':
params = { "method": "artist.getInfo",
"api_key": lastfm_apikey,
"mbid": self.id,
"format": "json"
}
url = "http://ws.audioscrobbler.com/2.0/?" + urllib.urlencode(params)
logger.debug('Retrieving artist information from: ' + url)
try:
result = urllib2.urlopen(url, timeout=20).read()
except:
logger.warn('Could not open url: ' + url)
data = lastfm.request_lastfm("artist.getinfo", mbid=self.id, api_key=lastfm_apikey)
if not data:
return
if result:
try:
data = simplejson.JSONDecoder().decode(result)
except:
logger.warn('Could not parse data from url: ' + url)
return
try:
self.info_summary = data['artist']['bio']['summary']
except Exception:
logger.debug('No artist bio summary found on url: ' + url)
self.info_summary = None
try:
self.info_content = data['artist']['bio']['content']
except Exception:
logger.debug('No artist bio found on url: ' + url)
self.info_content = None
try:
image_url = data['artist']['image'][-1]['#text']
except Exception:
logger.debug('No artist image found on url: ' + url)
image_url = None
thumb_url = self._get_thumb_url(data)
if not thumb_url:
logger.debug('No artist thumbnail image found on url: ' + url)
try:
self.info_summary = data['artist']['bio']['summary']
except Exception:
logger.debug('No artist bio summary found')
self.info_summary = None
try:
self.info_content = data['artist']['bio']['content']
except Exception:
logger.debug('No artist bio found')
self.info_content = None
try:
image_url = data['artist']['image'][-1]['#text']
except Exception:
logger.debug('No artist image found')
image_url = None
thumb_url = self._get_thumb_url(data)
if not thumb_url:
logger.debug('No artist thumbnail image found')
else:
dbartist = myDB.action('SELECT ArtistName, AlbumTitle FROM albums WHERE AlbumID=?', [self.id]).fetchone()
params = { "method": "album.getInfo",
"api_key": lastfm_apikey,
"artist": dbartist['ArtistName'].encode('utf-8'),
"album": dbartist['AlbumTitle'].encode('utf-8'),
"format": "json"
}
url = "http://ws.audioscrobbler.com/2.0/?" + urllib.urlencode(params)
logger.debug('Retrieving artist information from: ' + url)
try:
result = urllib2.urlopen(url, timeout=20).read()
except:
logger.warn('Could not open url: ' + url)
return
if result:
try:
data = simplejson.JSONDecoder().decode(result)
except:
logger.warn('Could not parse data from url: ' + url)
return
try:
self.info_summary = data['album']['wiki']['summary']
except Exception:
logger.debug('No album summary found from: ' + url)
self.info_summary = None
try:
self.info_content = data['album']['wiki']['content']
except Exception:
logger.debug('No album infomation found from: ' + url)
self.info_content = None
try:
image_url = data['album']['image'][-1]['#text']
except Exception:
logger.debug('No album image link found on url: ' + url)
image_url = None
thumb_url = self._get_thumb_url(data)
data = lastfm.request_lastfm("album.getinfo", artist=dbartist['ArtistName'], album=dbartist['AlbumTitle'], api_key=lastfm_apikey)
if not data:
return
try:
self.info_summary = data['album']['wiki']['summary']
except Exception:
logger.debug('No album summary found')
self.info_summary = None
try:
self.info_content = data['album']['wiki']['content']
except Exception:
logger.debug('No album infomation found')
self.info_content = None
try:
image_url = data['album']['image'][-1]['#text']
except Exception:
logger.debug('No album image link found')
image_url = None
thumb_url = self._get_thumb_url(data)
if not thumb_url:
logger.debug('No album thumbnail image found')
if not thumb_url:
logger.debug('No album thumbnail image found on url: ' + url)
#Save the content & summary to the database no matter what if we've opened up the url
if self.id_type == 'artist':
controlValueDict = {"ArtistID": self.id}
@@ -392,139 +317,128 @@ class Cache(object):
newValueDict = {"Summary": self.info_summary,
"Content": self.info_content,
"LastUpdated": helpers.today()}
myDB.upsert("descriptions", newValueDict, controlValueDict)
# Save the image URL to the database
if image_url:
if self.id_type == 'artist':
myDB.action('UPDATE artists SET ArtworkURL=? WHERE ArtistID=?', [image_url, self.id])
else:
myDB.action('UPDATE albums SET ArtworkURL=? WHERE AlbumID=?', [image_url, self.id])
# Save the thumb URL to the database
if thumb_url:
if self.id_type == 'artist':
myDB.action('UPDATE artists SET ThumbURL=? WHERE ArtistID=?', [thumb_url, self.id])
else:
myDB.action('UPDATE albums SET ThumbURL=? WHERE AlbumID=?', [thumb_url, self.id])
# Should we grab the artwork here if we're just grabbing thumbs or info?? Probably not since the files can be quite big
if image_url and self.query_type == 'artwork':
try:
artwork = urllib2.urlopen(image_url, timeout=20).read()
except Exception, e:
logger.error('Unable to open url "' + image_url + '". Error: ' + str(e))
artwork = None
artwork = request.request_content(image_url, timeout=20)
if artwork:
# Make sure the artwork dir exists:
if not os.path.isdir(self.path_to_art_cache):
try:
os.makedirs(self.path_to_art_cache)
except Exception, e:
logger.error('Unable to create artwork cache dir. Error: ' + str(e))
logger.error('Unable to create artwork cache dir. Error: %s', e)
self.artwork_errors = True
self.artwork_url = image_url
#Delete the old stuff
for artwork_file in self.artwork_files:
try:
os.remove(artwork_file)
except:
logger.error('Error deleting file from the cache: ' + artwork_file)
logger.error('Error deleting file from the cache: %s', artwork_file)
ext = os.path.splitext(image_url)[1]
artwork_path = os.path.join(self.path_to_art_cache, self.id + '.' + helpers.today() + ext)
try:
f = open(artwork_path, 'wb')
f.write(artwork)
f.close()
except Exception, e:
logger.error('Unable to write to the cache dir: ' + str(e))
logger.error('Unable to write to the cache dir: %s', e)
self.artwork_errors = True
self.artwork_url = image_url
# Grab the thumbnail as well if we're getting the full artwork (as long as it's missing/outdated
if thumb_url and self.query_type in ['thumb','artwork'] and not (self.thumb_files and self._is_current(self.thumb_files[0])):
try:
artwork = urllib2.urlopen(thumb_url, timeout=20).read()
except Exception, e:
logger.error('Unable to open url "' + thumb_url + '". Error: ' + str(e))
artwork = None
artwork = request.request_content(thumb_url, timeout=20)
if artwork:
# Make sure the artwork dir exists:
if not os.path.isdir(self.path_to_art_cache):
try:
os.makedirs(self.path_to_art_cache)
except Exception, e:
logger.error('Unable to create artwork cache dir. Error: ' + str(e))
logger.error('Unable to create artwork cache dir. Error: %s' + e)
self.thumb_errors = True
self.thumb_url = thumb_url
#Delete the old stuff
for thumb_file in self.thumb_files:
try:
os.remove(thumb_file)
except:
logger.error('Error deleting file from the cache: ' + thumb_file)
logger.error('Error deleting file from the cache: %s', thumb_file)
ext = os.path.splitext(image_url)[1]
thumb_path = os.path.join(self.path_to_art_cache, 'T_' + self.id + '.' + helpers.today() + ext)
try:
f = open(thumb_path, 'wb')
f.write(artwork)
f.close()
except Exception, e:
logger.error('Unable to write to the cache dir: ' + str(e))
logger.error('Unable to write to the cache dir: %s', e)
self.thumb_errors = True
self.thumb_url = image_url
def getArtwork(ArtistID=None, AlbumID=None):
c = Cache()
artwork_path = c.get_artwork_from_cache(ArtistID, AlbumID)
if not artwork_path:
return None
if artwork_path.startswith('http://'):
return artwork_path
else:
artwork_file = os.path.basename(artwork_path)
return "cache/artwork/" + artwork_file
def getThumb(ArtistID=None, AlbumID=None):
c = Cache()
artwork_path = c.get_thumb_from_cache(ArtistID, AlbumID)
if not artwork_path:
return None
if artwork_path.startswith('http://'):
return artwork_path
else:
thumbnail_file = os.path.basename(artwork_path)
return "cache/artwork/" + thumbnail_file
def getInfo(ArtistID=None, AlbumID=None):
c = Cache()
info_dict = c.get_info_from_cache(ArtistID, AlbumID)
return info_dict
def getImageLinks(ArtistID=None, AlbumID=None):
c = Cache()
image_links = c.get_image_links(ArtistID, AlbumID)
return image_links

View File

@@ -73,14 +73,14 @@ class DBConnection:
break
except sqlite3.OperationalError, e:
if "unable to open database file" in e.message or "database is locked" in e.message:
logger.warn('Database Error: %s' % e)
logger.warn('Database Error: %s', e)
attempt += 1
time.sleep(1)
else:
logger.error('Database error: %s' % e)
logger.error('Database error: %s', e)
raise
except sqlite3.DatabaseError, e:
logger.error('Fatal Error executing %s :: %s' % (query, e))
logger.error('Fatal Error executing %s :: %s', query, e)
raise
return sqlResult

View File

@@ -13,17 +13,25 @@
# You should have received a copy of the GNU General Public License
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
import os, time
from operator import itemgetter
import os
import re
import time
import shutil
import datetime
import re, shutil
import headphones
def multikeysort(items, columns):
from operator import itemgetter
from beets.mediafile import MediaFile, FileTypeError, UnreadableFileError
# Modified from https://github.com/Verrus/beets-plugin-featInTitle
RE_FEATURING = re.compile(r"[fF]t\.|[fF]eaturing|[fF]eat\.|\b[wW]ith\b|&|vs\.")
RE_CD_ALBUM = re.compile(r"\(?((CD|disc)\s*[0-9]+)\)?", re.I)
RE_CD = re.compile(r"^(CD|dics)\s*[0-9]+$", re.I)
def multikeysort(items, columns):
comparers = [ ((itemgetter(col[1:].strip()), -1) if col.startswith('-') else (itemgetter(col.strip()), 1)) for col in columns]
def comparer(left, right):
for fn, mult in comparers:
result = cmp(fn(left), fn(right))
@@ -31,22 +39,22 @@ def multikeysort(items, columns):
return mult * result
else:
return 0
return sorted(items, cmp=comparer)
def checked(variable):
if variable:
return 'Checked'
else:
return ''
def radio(variable, pos):
if variable == pos:
return 'Checked'
else:
return ''
def latinToAscii(unicrap):
"""
From couch potato
@@ -87,7 +95,7 @@ def latinToAscii(unicrap):
else:
r += str(i)
return r
def convert_milliseconds(ms):
seconds = ms/1000
@@ -98,7 +106,7 @@ def convert_milliseconds(ms):
minutes = time.strftime("%M:%S", gmtime)
return minutes
def convert_seconds(s):
gmtime = time.gmtime(s)
@@ -108,30 +116,30 @@ def convert_seconds(s):
minutes = time.strftime("%M:%S", gmtime)
return minutes
def today():
today = datetime.date.today()
yyyymmdd = datetime.date.isoformat(today)
return yyyymmdd
def now():
now = datetime.datetime.now()
return now.strftime("%Y-%m-%d %H:%M:%S")
def get_age(date):
try:
split_date = date.split('-')
except:
return False
try:
days_old = int(split_date[0])*365 + int(split_date[1])*30 + int(split_date[2])
except IndexError:
days_old = False
return days_old
def bytes_to_mb(bytes):
mb = int(bytes)/1048576
@@ -142,7 +150,7 @@ def mb_to_bytes(mb_str):
result = re.search('^(\d+(?:\.\d+)?)\s?(?:mb)?', mb_str, flags=re.I)
if result:
return int(float(result.group(1))*1048576)
def piratesize(size):
split = size.split(" ")
factor = float(split[0])
@@ -157,48 +165,155 @@ def piratesize(size):
size = factor
else:
size = 0
return size
def replace_all(text, dic):
if not text:
return ''
for i, j in dic.iteritems():
text = text.replace(i, j)
return text
def replace_illegal_chars(string, type="file"):
if type == "file":
string = re.sub('[\?"*:|<>/]', '_', string)
if type == "folder":
string = re.sub('[:\?<>"|]', '_', string)
return string
def cleanName(string):
pass1 = latinToAscii(string).lower()
out_string = re.sub('[\.\-\/\!\@\#\$\%\^\&\*\(\)\+\-\"\'\,\;\:\[\]\{\}\<\>\=\_]', '', pass1).encode('utf-8')
return out_string
def cleanTitle(title):
title = re.sub('[\.\-\/\_]', ' ', title).lower()
# Strip out extra whitespace
title = ' '.join(title.split())
title = title.title()
return title
def split_path(f):
"""
Split a path into components, starting with the drive letter (if any). Given
a path, os.path.join(*split_path(f)) should be path equal to f.
"""
components = []
drive, path = os.path.splitdrive(f)
# Stip the folder from the path, iterate until nothing is left
while True:
path, folder = os.path.split(path)
if folder:
components.append(folder)
else:
if path:
components.append(path)
break
# Append the drive (if any)
if drive:
components.append(drive)
# Reverse components
components.reverse()
# Done
return components
def expand_subfolders(f):
"""
Try to expand a given folder and search for subfolders containing media
files. This should work for discographies indexed per album in the same
root, possibly with folders per CD (if any).
This algorithm will return nothing if the result is only one folder. In this
case, normal post processing will be better.
"""
from headphones import logger
# Find all folders with media files in them
media_folders = []
for root, dirs, files in os.walk(f):
for file in files:
extension = os.path.splitext(file)[1].lower()[1:]
if extension in headphones.MEDIA_FORMATS:
if root not in media_folders:
media_folders.append(root)
# Stop here if nothing found
if len(media_folders) == 0:
return
# Split into path components
media_folders = [ split_path(media_folder) for media_folder in media_folders ]
# Correct folder endings such as CD1 etc.
for index, media_folder in enumerate(media_folders):
if RE_CD.match(media_folder[-1]):
media_folders[index] = media_folders[index][:-1]
# Verify the result by computing path depth relative to root.
path_depths = [ len(media_folder) for media_folder in media_folders ]
difference = max(path_depths) - min(path_depths)
if difference > 0:
logger.info("Found %d media folders, but depth difference between lowest and deepest media folder is %d (expected zero). If this is a discography or a collection of albums, make sure albums are per folder" % (len(media_folders), difference))
# While already failed, advice the user what he could try. We assume the
# directory may contain separate CD's and maybe some extra's. The
# structure may look like X albums at same depth, and (one or more)
# extra folders with a higher depth.
extra_media_folders = [ media_folder[:min(path_depths)] for media_folder in media_folders if len(media_folder) > min(path_depths) ]
extra_media_folders = list(set([ os.path.join(*media_folder) for media_folder in extra_media_folders ]))
logger.info("Please look at the following folder(s), since they cause the depth difference: %s" % extra_media_folders)
return
# Convert back to paths and remove duplicates, which may be there after
# correcting the paths
media_folders = list(set([ os.path.join(*media_folder) for media_folder in media_folders ]))
# Don't return a result if the number of subfolders is one. In this case,
# this algorithm will not improve processing and will likely interfere
# with other attempts such as MusicBrainz release group IDs.
if len(media_folders) == 1:
logger.debug("Did not expand subfolder, as it resulted in one folder.")
return
logger.debug("Expanded subfolders in folder: " % media_folders)
return media_folders
def extract_data(s):
s = s.replace('_', ' ')
#headphones default format
pattern = re.compile(r'(?P<name>.*?)\s\-\s(?P<album>.*?)\s\[(?P<year>.*?)\]', re.VERBOSE)
match = pattern.match(s)
if match:
name = match.group("name")
album = match.group("album")
year = match.group("year")
return (name, album, year)
#newzbin default format
pattern = re.compile(r'(?P<name>.*?)\s\-\s(?P<album>.*?)\s\((?P<year>\d+?\))', re.VERBOSE)
match = pattern.match(s)
@@ -207,9 +322,118 @@ def extract_data(s):
album = match.group("album")
year = match.group("year")
return (name, album, year)
#Gonna take a guess on this one - might be enough to search on mb
pat = re.compile(r"(?P<name>.*?)\s*-\s*(?P<album>[^\[(-]*)")
match = pat.match(s)
if match:
name = match.group("name")
album = match.group("album")
year = None
return (name, album, year)
else:
return (None, None, None)
def extract_metadata(f):
"""
Scan all files in the given directory and decide on an artist, album and
year based on the metadata. A decision is based on the number of different
artists, albums and years found in the media files.
"""
from headphones import logger
# Walk directory and scan all media files
results = []
count = 0
for root, dirs, files in os.walk(f):
for file in files:
# Count the number of potential media files
extension = os.path.splitext(file)[1].lower()[1:]
if extension in headphones.MEDIA_FORMATS:
count += 1
# Try to read the file info
try:
media_file = MediaFile(os.path.join(root, file))
except FileTypeError, UnreadableFileError:
# Probably not a media file
continue
# Append metadata to file
artist = media_file.albumartist or media_file.artist
album = media_file.album
year = media_file.year
if artist and album and year:
results.append((artist.lower(), album.lower(), year))
# Verify results
if len(results) == 0:
logger.info("No metadata in media files found, ignoring")
return (None, None, None)
# Require that some percentage of files have tags
count_ratio = 0.75
if count < (count_ratio * len(results)):
logger.info("Counted %d media files, but only %d have tags, ignoring" % (count, len(results)))
return (None, None, None)
# Count distinct values
artists = list(set([ x[0] for x in results ]))
albums = list(set([ x[1] for x in results ]))
years = list(set([ x[2] for x in results ]))
# Remove things such as CD2 from album names
if len(albums) > 1:
new_albums = list(albums)
# Replace occurences of e.g. CD1
for index, album in enumerate(new_albums):
if RE_CD_ALBUM.search(album):
old_album = new_albums[index]
new_albums[index] = RE_CD_ALBUM.sub("", album).strip()
logger.debug("Stripped albumd number identifier: %s -> %s" % (old_album, new_albums[index]))
# Remove duplicates
new_albums = list(set(new_albums))
# Safety check: if nothing has merged, then ignore the work. This can
# happen if only one CD of a multi part CD is processed.
if len(new_albums) < len(albums):
albums = new_albums
# All files have the same metadata, so it's trivial
if len(artists) == 1 and len(albums) == 1:
return (artists[0], albums[0], years[0])
# (Lots of) different artists. Could be a featuring album, so test for this.
if len(artists) > 1 and len(albums) == 1:
split_artists = [ RE_FEATURING.split(artist) for artist in artists ]
featurings = [ len(split_artist) - 1 for split_artist in split_artists ]
logger.info("Album seem to feature %d different artists" % sum(featurings))
if sum(featurings) > 0:
# Find the artist of which the least splits have been generated.
# Ideally, this should be 0, which should be the album artist
# itself.
artist = split_artists[featurings.index(min(featurings))][0]
# Done
return (artist, albums[0], years[0])
# Not sure what to do here.
logger.info("Found %d artists, %d albums and %d years in metadata, so ignoring" % (len(artists), len(albums), len(years)))
logger.debug("Artists: %s, Albums: %s, Years: %s" % (artists, albums, years))
return (None, None, None)
def extract_logline(s):
# Default log format
pattern = re.compile(r'(?P<timestamp>.*?)\s\-\s(?P<level>.*?)\s*\:\:\s(?P<thread>.*?)\s\:\s(?P<message>.*)', re.VERBOSE)
@@ -222,26 +446,26 @@ def extract_logline(s):
return (timestamp, level, thread, message)
else:
return None
def extract_song_data(s):
#headphones default format
music_dir = headphones.MUSIC_DIR
folder_format = headphones.FOLDER_FORMAT
file_format = headphones.FILE_FORMAT
full_format = os.path.join(headphones.MUSIC_DIR)
pattern = re.compile(r'(?P<name>.*?)\s\-\s(?P<album>.*?)\s\[(?P<year>.*?)\]', re.VERBOSE)
match = pattern.match(s)
if match:
name = match.group("name")
album = match.group("album")
year = match.group("year")
return (name, album, year)
else:
logger.info("Couldn't parse " + s + " into a valid default format")
logger.info("Couldn't parse %s into a valid default format", s)
#newzbin default format
pattern = re.compile(r'(?P<name>.*?)\s\-\s(?P<album>.*?)\s\((?P<year>\d+?\))', re.VERBOSE)
match = pattern.match(s)
@@ -251,18 +475,18 @@ def extract_song_data(s):
year = match.group("year")
return (name, album, year)
else:
logger.info("Couldn't parse " + s + " into a valid Newbin format")
logger.info("Couldn't parse %s into a valid Newbin format", s)
return (name, album, year)
def smartMove(src, dest, delete=True):
from headphones import logger
source_dir = os.path.dirname(src)
filename = os.path.basename(src)
if os.path.isfile(os.path.join(dest, filename)):
logger.info('Destination file exists: %s' % os.path.join(dest, filename).decode(headphones.SYS_ENCODING, 'replace'))
logger.info('Destination file exists: %s', os.path.join(dest, filename).decode(headphones.SYS_ENCODING, 'replace'))
title = os.path.splitext(filename)[0]
ext = os.path.splitext(filename)[1]
i = 1
@@ -271,12 +495,12 @@ def smartMove(src, dest, delete=True):
if os.path.isfile(os.path.join(dest, newfile)):
i += 1
else:
logger.info('Renaming to %s' % newfile)
try:
logger.info('Renaming to %s', newfile)
try:
os.rename(src, os.path.join(source_dir, newfile))
filename = newfile
except Exception, e:
logger.warn('Error renaming %s: %s' % (src.decode(headphones.SYS_ENCODING, 'replace'), str(e).decode(headphones.SYS_ENCODING, 'replace')))
logger.warn('Error renaming %s: %s', src.decode(headphones.SYS_ENCODING, 'replace'), e)
break
try:
@@ -286,7 +510,7 @@ def smartMove(src, dest, delete=True):
shutil.copy(os.path.join(source_dir, filename), os.path.join(dest, filename))
return True
except Exception, e:
logger.warn('Error moving file %s: %s' % (filename.decode(headphones.SYS_ENCODING, 'replace'), str(e).decode(headphones.SYS_ENCODING, 'replace')))
logger.warn('Error moving file %s: %s', filename.decode(headphones.SYS_ENCODING, 'replace'), e)
#########################
#Sab renaming functions #
@@ -305,13 +529,13 @@ def sab_sanitize_foldername(name):
"""
CH_ILLEGAL = r'\/<>?*|"'
CH_LEGAL = r'++{}!@#`'
FL_ILLEGAL = CH_ILLEGAL + ':\x92"'
FL_LEGAL = CH_LEGAL + "-''"
uFL_ILLEGAL = FL_ILLEGAL.decode('latin-1')
uFL_LEGAL = FL_LEGAL.decode('latin-1')
if not name:
return name
if isinstance(name, unicode):
@@ -340,9 +564,9 @@ def sab_sanitize_foldername(name):
return name
def split_string(mystring):
def split_string(mystring, splitvar=','):
mylist = []
for each_word in mystring.split(','):
for each_word in mystring.split(splitvar):
mylist.append(each_word.strip())
return mylist
@@ -352,12 +576,12 @@ def create_https_certificates(ssl_cert, ssl_key):
Create self-signed HTTPS certificares and store in paths 'ssl_cert' and 'ssl_key'
"""
from headphones import logger
try:
from OpenSSL import crypto #@UnresolvedImport
from lib.certgen import createKeyPair, createCertRequest, createCertificate, TYPE_RSA, serial #@UnresolvedImport
from OpenSSL import crypto
from lib.certgen import createKeyPair, createCertRequest, createCertificate, TYPE_RSA, serial
except:
logger.warn(u"pyopenssl module missing, please install for https access")
logger.warn("pyOpenSSL module missing, please install to enable HTTPS")
return False
# Create the CA Certificate
@@ -374,8 +598,8 @@ def create_https_certificates(ssl_cert, ssl_key):
try:
open(ssl_key, 'w').write(crypto.dump_privatekey(crypto.FILETYPE_PEM, pkey))
open(ssl_cert, 'w').write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert))
except:
logger.error(u"Error creating SSL key and certificate")
except Exception, e:
logger.error("Error creating SSL key and certificate: %s", e)
return False
return True

View File

@@ -17,10 +17,10 @@ from lib.pyItunes import *
import time
import threading
import os
from lib.beets.mediafile import MediaFile
from beets.mediafile import MediaFile
import headphones
from headphones import logger, helpers, db, mb, albumart, lastfm
from headphones import logger, helpers, db, mb, lastfm
blacklisted_special_artist_names = ['[anonymous]','[data]','[no artist]','[traditional]','[unknown]','Various Artists']
blacklisted_special_artists = ['f731ccc4-e22a-43af-a747-64213329e088','33cf029c-63b0-41a0-9855-be2a3665fb3b',\
@@ -28,11 +28,11 @@ blacklisted_special_artists = ['f731ccc4-e22a-43af-a747-64213329e088','33cf029c-
'9be7f096-97ec-4615-8957-8d40b5dcbc41','125ec42a-7229-4250-afc5-e057484327fe',\
'89ad4ac3-39f7-470e-963a-56509c546377']
def is_exists(artistid):
myDB = db.DBConnection()
# See if the artist is already in the database
artistlist = myDB.select('SELECT ArtistID, ArtistName from artists WHERE ArtistID=?', [artistid])
@@ -46,10 +46,10 @@ def is_exists(artistid):
def artistlist_to_mbids(artistlist, forced=False):
for artist in artistlist:
if not artist and not (artist == ' '):
continue
# If adding artists through Manage New Artists, they're coming through as non-unicode (utf-8?)
# and screwing everything up
@@ -59,50 +59,50 @@ def artistlist_to_mbids(artistlist, forced=False):
except:
logger.warn("Unable to convert artist to unicode so cannot do a database lookup")
continue
results = mb.findArtist(artist, limit=1)
if not results:
logger.info('No results found for: %s' % artist)
continue
try:
try:
artistid = results[0]['id']
except IndexError:
logger.info('MusicBrainz query turned up no matches for: %s' % artist)
continue
# Check if it's blacklisted/various artists (only check if it's not forced, e.g. through library scan auto-add.)
# Forced example = Adding an artist from Manage New Artists
myDB = db.DBConnection()
if not forced:
bl_artist = myDB.action('SELECT * FROM blacklist WHERE ArtistID=?', [artistid]).fetchone()
if bl_artist or artistid in blacklisted_special_artists:
logger.info("Artist ID for '%s' is either blacklisted or Various Artists. To add artist, you must do it manually (Artist ID: %s)" % (artist, artistid))
continue
# Add to database if it doesn't exist
if not is_exists(artistid):
addArtisttoDB(artistid)
# Just update the tracks if it does
else:
havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=?', [artistid])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [artist]))
myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, artistid])
# Delete it from the New Artists if the request came from there
if forced:
myDB.action('DELETE from newartists WHERE ArtistName=?', [artist])
# Update the similar artist tag cloud:
logger.info('Updating artist information from Last.fm')
try:
lastfm.getSimilar()
except Exception, e:
logger.warn('Failed to update arist information from Last.fm: %s' % e)
def addArtistIDListToDB(artistidlist):
# Used to add a list of artist IDs to the database in a single thread
logger.debug("Importer: Adding artist ids %s" % artistidlist)
@@ -110,20 +110,20 @@ def addArtistIDListToDB(artistidlist):
addArtisttoDB(artistid)
def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
# Putting this here to get around the circular import. We're using this to update thumbnails for artist/albums
from headphones import cache
# Can't add various artists - throws an error from MB
if artistid in blacklisted_special_artists:
logger.warn('Cannot import blocked special purpose artist with id' + artistid)
return
# We'll use this to see if we should update the 'LastUpdated' time stamp
errors = False
myDB = db.DBConnection()
# Delete from blacklist if it's on there
myDB.action('DELETE from blacklist WHERE ArtistID=?', [artistid])
@@ -135,7 +135,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
# Don't replace a known artist name with an "Artist ID" placeholder
dbartist = myDB.action('SELECT * FROM artists WHERE ArtistID=?', [artistid]).fetchone()
# Only modify the Include Extras stuff if it's a new artist. We need it early so we know what to fetch
if not dbartist:
newValueDict = {"ArtistName": "Artist ID: %s" % (artistid),
@@ -146,9 +146,9 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
newValueDict = {"Status": "Loading"}
myDB.upsert("artists", newValueDict, controlValueDict)
artist = mb.getArtist(artistid, extrasonly)
if artist and artist.get('artist_name') in blacklisted_special_artist_names:
logger.warn('Cannot import blocked special purpose artist: %s' % artist.get('artist_name'))
myDB.action('DELETE from artists WHERE ArtistID=?', [artistid])
@@ -166,12 +166,12 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
newValueDict = {"Status": "Active"}
myDB.upsert("artists", newValueDict, controlValueDict)
return
if artist['artist_name'].startswith('The '):
sortname = artist['artist_name'][4:]
else:
sortname = artist['artist_name']
logger.info(u"Now adding/updating: " + artist['artist_name'])
controlValueDict = {"ArtistID": artistid}
@@ -179,13 +179,13 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
"ArtistSortName": sortname,
"DateAdded": helpers.today(),
"Status": "Loading"}
myDB.upsert("artists", newValueDict, controlValueDict)
# See if we need to grab extras. Artist specific extras take precedence over global option
# Global options are set when adding a new artist
myDB = db.DBConnection()
try:
db_artist = myDB.action('SELECT IncludeExtras, Extras from artists WHERE ArtistID=?', [artistid]).fetchone()
includeExtras = db_artist['IncludeExtras']
@@ -207,16 +207,16 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
myDB.action("DELETE FROM allalbums WHERE AlbumID=?", [items['AlbumID']])
myDB.action("DELETE FROM tracks WHERE AlbumID=?", [items['AlbumID']])
myDB.action("DELETE FROM alltracks WHERE AlbumID=?", [items['AlbumID']])
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [items['AlbumID']])
logger.info("[%s] Removing all references to release group %s to reflect MusicBrainz" % (artist['artist_name'], items['AlbumID']))
force_repackage = 1
else:
logger.info("[%s] Error pulling data from MusicBrainz: Maintaining dB" % artist['artist_name'])
# Then search for releases within releasegroups, if releases don't exist, then remove from allalbums/alltracks
logger.info("[%s] There was either an error pulling data from MusicBrainz or there might not be any releases for this category" % artist['artist_name'])
# Then search for releases within releasegroups, if releases don't exist, then remove from allalbums/alltracks
album_searches = []
for rg in artist['releasegroups']:
al_title = rg['title']
today = helpers.today()
rgid = rg['id']
@@ -235,7 +235,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
if check_release_date:
if check_release_date[0] is None:
logger.info("[%s] Now updating: %s (No Release Date)" % (artist['artist_name'], rg['title']))
new_releases = mb.get_new_releases(rgid,includeExtras,True)
new_releases = mb.get_new_releases(rgid,includeExtras,True)
else:
if len(check_release_date[0]) == 10:
release_date = check_release_date[0]
@@ -264,7 +264,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
else:
logger.info("[%s] Now adding/updating: %s (Comprehensive Force)" % (artist['artist_name'], rg['title']))
new_releases = mb.get_new_releases(rgid,includeExtras,forcefull)
#What this does is adds new releases per artist to the allalbums + alltracks databases
#new_releases = mb.get_new_releases(rgid,includeExtras)
#print al_title
@@ -276,7 +276,8 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
myDB.action("DELETE from allalbums WHERE ReleaseID=?", [rg['id']])
myDB.action("DELETE from tracks WHERE ReleaseID=?", [rg['id']])
myDB.action("DELETE from alltracks WHERE ReleaseID=?", [rg['id']])
# This will be used later to build a hybrid release
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rg['id']])
# This will be used later to build a hybrid release
fullreleaselist = []
#Search for releases within a release group
find_hybrid_releases = myDB.action("SELECT * from allalbums WHERE AlbumID=?", [rg['id']])
@@ -305,7 +306,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
#'url': hybrid_tracks['TrackURL'],
'duration': hybrid_tracks['TrackDuration']
})
totalTracks += 1
totalTracks += 1
newValueDict['ReleaseID'] = hybrid_release_id
newValueDict['Tracks'] = hybrid_track_array
fullreleaselist.append(newValueDict)
@@ -321,7 +322,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
errors = True
logger.warn('[%s] Unable to get hybrid release information for %s: %s' % (artist['artist_name'],rg['title'],e))
continue
# Use the ReleaseGroupID as the ReleaseID for the hybrid release to differentiate it
# We can then use the condition WHERE ReleaseID == ReleaseGroupID to select it
# The hybrid won't have a country or a format
@@ -335,13 +336,13 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
"ReleaseDate": hybridrelease['ReleaseDate'],
"Type": rg['type']
}
myDB.upsert("allalbums", newValueDict, controlValueDict)
for track in hybridrelease['Tracks']:
cleanname = helpers.cleanName(artist['artist_name'] + ' ' + rg['title'] + ' ' + track['title'])
controlValueDict = {"TrackID": track['id'],
"ReleaseID": rg['id']}
@@ -355,25 +356,25 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
"TrackNumber": track['number'],
"CleanName": cleanname
}
match = myDB.action('SELECT Location, BitRate, Format from have WHERE CleanName=?', [cleanname]).fetchone()
if not match:
match = myDB.action('SELECT Location, BitRate, Format from have WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [artist['artist_name'], rg['title'], track['title']]).fetchone()
#if not match:
#match = myDB.action('SELECT Location, BitRate, Format from have WHERE TrackID=?', [track['id']]).fetchone()
#match = myDB.action('SELECT Location, BitRate, Format from have WHERE TrackID=?', [track['id']]).fetchone()
if match:
newValueDict['Location'] = match['Location']
newValueDict['BitRate'] = match['BitRate']
newValueDict['Format'] = match['Format']
#myDB.action('UPDATE have SET Matched="True" WHERE Location=?', [match['Location']])
myDB.action('UPDATE have SET Matched=? WHERE Location=?', (rg['id'], match['Location']))
myDB.upsert("alltracks", newValueDict, controlValueDict)
# Delete matched tracks from the have table
#myDB.action('DELETE from have WHERE Matched="True"')
# If there's no release in the main albums tables, add the default (hybrid)
# If there is a release, check the ReleaseID against the AlbumID to see if they differ (user updated)
# check if the album already exists
@@ -382,7 +383,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
releaseid = rg['id']
else:
releaseid = rg_exists['ReleaseID']
album = myDB.action('SELECT * from allalbums WHERE ReleaseID=?', [releaseid]).fetchone()
controlValueDict = {"AlbumID": rg['id']}
@@ -397,13 +398,13 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
"ReleaseCountry": album['ReleaseCountry'],
"ReleaseFormat": album['ReleaseFormat']
}
if not rg_exists:
today = helpers.today()
newValueDict['DateAdded']= today
if headphones.AUTOWANT_ALL:
newValueDict['Status'] = "Wanted"
elif album['ReleaseDate'] > today and headphones.AUTOWANT_UPCOMING:
@@ -414,16 +415,16 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
newValueDict['Status'] = "Wanted"
else:
newValueDict['Status'] = "Skipped"
myDB.upsert("albums", newValueDict, controlValueDict)
myDB.upsert("albums", newValueDict, controlValueDict)
tracks = myDB.action('SELECT * from alltracks WHERE ReleaseID=?', [releaseid]).fetchall()
# This is used to see how many tracks you have from an album - to mark it as downloaded. Default is 80%, can be set in config as ALBUM_COMPLETION_PCT
total_track_count = len(tracks)
for track in tracks:
controlValueDict = {"TrackID": track['TrackID'],
"AlbumID": rg['id']}
@@ -440,13 +441,13 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
"Format": track['Format'],
"BitRate": track['BitRate']
}
myDB.upsert("tracks", newValueDict, controlValueDict)
myDB.upsert("tracks", newValueDict, controlValueDict)
# Mark albums as downloaded if they have at least 80% (by default, configurable) of the album
have_track_count = len(myDB.select('SELECT * from tracks WHERE AlbumID=? AND Location IS NOT NULL', [rg['id']]))
marked_as_downloaded = False
if rg_exists:
if rg_exists['Status'] == 'Skipped' and ((have_track_count/float(total_track_count)) >= (headphones.ALBUM_COMPLETION_PCT/100.0)):
myDB.action('UPDATE albums SET Status=? WHERE AlbumID=?', ['Downloaded', rg['id']])
@@ -458,11 +459,12 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
logger.info(u"[%s] Seeing if we need album art for %s" % (artist['artist_name'], rg['title']))
cache.getThumb(AlbumID=rg['id'])
#start a search for the album if it's new, hasn't been marked as downloaded and autowant_all is selected:
if not rg_exists and not marked_as_downloaded and headphones.AUTOWANT_ALL:
from headphones import searcher
searcher.searchforalbum(albumid=rg['id'])
# Start a search for the album if it's new, hasn't been marked as
# downloaded and autowant_all is selected. This search is deferred,
# in case the search failes and the rest of the import will halt.
if not rg_exists and not marked_as_downloaded and headphones.AUTOWANT_ALL:
album_searches.append(rg['id'])
else:
if skip_log == 0:
logger.info(u"[%s] No new releases, so no changes made to %s" % (artist['artist_name'], rg['title']))
@@ -473,7 +475,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [artistid])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', [artist['artist_name']]))
controlValueDict = {"ArtistID": artistid}
if latestalbum:
newValueDict = {"Status": "Active",
"LatestAlbum": latestalbum['AlbumTitle'],
@@ -485,24 +487,31 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
newValueDict = {"Status": "Active",
"TotalTracks": totaltracks,
"HaveTracks": havetracks}
if not errors:
newValueDict['LastUpdated'] = helpers.now()
myDB.upsert("artists", newValueDict, controlValueDict)
logger.info(u"Seeing if we need album art for: %s" % artist['artist_name'])
cache.getThumb(ArtistID=artistid)
if errors:
logger.info("[%s] Finished updating artist: %s but with errors, so not marking it as updated in the database" % (artist['artist_name'], artist['artist_name']))
else:
myDB.action('DELETE FROM newartists WHERE ArtistName = ?', [artist['artist_name']])
logger.info(u"Updating complete for: %s" % artist['artist_name'])
# Start searching for newly added albums
if album_searches:
from headphones import searcher
logger.info("Start searching for %d albums.", len(album_searches))
for album_search in album_searches:
searcher.searchforalbum(albumid=album_search)
def addReleaseById(rid):
myDB = db.DBConnection()
rgid = None
@@ -519,45 +528,45 @@ def addReleaseById(rid):
try:
release_dict = mb.getRelease(rid)
except Exception, e:
logger.info('Unable to get release information for Release: ' + str(rid) + " " + str(e))
logger.info('Unable to get release information for Release %s: %s', rid, e)
return
if not release_dict:
logger.info('Unable to get release information for Release: ' + str(rid) + " no dict")
logger.info('Unable to get release information for Release %s: no dict', rid)
return
rgid = release_dict['rgid']
artistid = release_dict['artist_id']
#we don't want to make more calls to MB here unless we have to, could be happening quite a lot
rg_exists = myDB.select("SELECT * from albums WHERE AlbumID=?", [rgid])
#make sure the artist exists since I don't know what happens later if it doesn't
artist_exists = myDB.select("SELECT * from artists WHERE ArtistID=?", [artistid])
if not artist_exists and release_dict:
if release_dict['artist_name'].startswith('The '):
sortname = release_dict['artist_name'][4:]
else:
sortname = release_dict['artist_name']
logger.info(u"Now manually adding: " + release_dict['artist_name'] + " - with status Paused")
controlValueDict = {"ArtistID": release_dict['artist_id']}
newValueDict = {"ArtistName": release_dict['artist_name'],
"ArtistSortName": sortname,
"DateAdded": helpers.today(),
"Status": "Paused"}
if headphones.INCLUDE_EXTRAS:
newValueDict['IncludeExtras'] = 1
newValueDict['Extras'] = headphones.EXTRAS
myDB.upsert("artists", newValueDict, controlValueDict)
elif not artist_exists and not release_dict:
logger.error("Artist does not exist in the database and did not get a valid response from MB. Skipping release.")
return
if not rg_exists and release_dict: #it should never be the case that we have an rg and not the artist
#but if it is this will fail
logger.info(u"Now adding-by-id album (" + release_dict['title'] + ") from id: " + rgid)
@@ -572,16 +581,16 @@ def addReleaseById(rid):
"Status": 'Wanted',
"Type": release_dict['rg_type']
}
myDB.upsert("albums", newValueDict, controlValueDict)
#keep a local cache of these so that external programs that are adding releasesByID don't hammer MB
myDB.action('INSERT INTO releases VALUES( ?, ?)', [rid, release_dict['rgid']])
for track in release_dict['tracks']:
cleanname = helpers.cleanName(release_dict['artist_name'] + ' ' + release_dict['rg_title'] + ' ' + track['title'])
controlValueDict = {"TrackID": track['id'],
"AlbumID": rgid}
newValueDict = {"ArtistID": release_dict['artist_id'],
@@ -593,23 +602,23 @@ def addReleaseById(rid):
"TrackNumber": track['number'],
"CleanName": cleanname
}
match = myDB.action('SELECT Location, BitRate, Format from have WHERE CleanName=?', [cleanname]).fetchone()
if not match:
match = myDB.action('SELECT Location, BitRate, Format from have WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [release_dict['artist_name'], release_dict['rg_title'], track['title']]).fetchone()
#if not match:
#match = myDB.action('SELECT Location, BitRate, Format from have WHERE TrackID=?', [track['id']]).fetchone()
if match:
newValueDict['Location'] = match['Location']
newValueDict['BitRate'] = match['BitRate']
newValueDict['Format'] = match['Format']
#myDB.action('DELETE from have WHERE Location=?', [match['Location']])
myDB.upsert("tracks", newValueDict, controlValueDict)
#start a search for the album
import searcher
searcher.searchforalbum(rgid, False)
@@ -655,34 +664,34 @@ def getHybridRelease(fullreleaselist):
if len(fullreleaselist) == 0:
raise Exception("getHybridRelease was called with an empty fullreleaselist")
sortable_release_list = []
for release in fullreleaselist:
formats = {
'2xVinyl': '2',
'Vinyl': '2',
'CD': '0',
'Cassette': '3',
'Cassette': '3',
'2xCD': '1',
'Digital Media': '0'
}
countries = {
'US': '0',
'GB': '1',
'JP': '2',
}
try:
format = int(formats[release['Format']])
except:
format = 3
try:
country = int(countries[release['Country']])
country = int(countries[release['Country']])
except:
country = 3
release_dict = {
'hasasin': bool(release['AlbumASIN']),
'asin': release['AlbumASIN'],
@@ -695,11 +704,11 @@ def getHybridRelease(fullreleaselist):
}
sortable_release_list.append(release_dict)
#necessary to make dates that miss the month and/or day show up after full dates
def getSortableReleaseDate(releaseDate):
if releaseDate == None:
return 'None';#change this value to change the sorting behaviour of none, returning 'None' will put it at the top
return 'None';#change this value to change the sorting behaviour of none, returning 'None' will put it at the top
#which was normal behaviour for pre-ngs versions
if releaseDate.count('-') == 2:
return releaseDate
@@ -713,12 +722,12 @@ def getHybridRelease(fullreleaselist):
average_tracks = sum(x['trackscount'] for x in sortable_release_list) / float(len(sortable_release_list))
for item in sortable_release_list:
item['trackscount_delta'] = abs(average_tracks - item['trackscount'])
a = helpers.multikeysort(sortable_release_list, ['-hasasin', 'country', 'format', 'trackscount_delta'])
release_dict = {'ReleaseDate' : sortable_release_list[0]['releasedate'],
'Tracks' : a[0]['tracks'],
'AlbumASIN' : a[0]['asin']
}
return release_dict

View File

@@ -13,237 +13,141 @@
# You should have received a copy of the GNU General Public License
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
import urllib, urllib2
from xml.dom import minidom
from collections import defaultdict
import random
import time
import headphones
from headphones import db, logger
api_key = '395e6ec6bb557382fc41fde867bce66f'
from headphones import db, logger, request
from collections import defaultdict
ENTRY_POINT = 'http://ws.audioscrobbler.com/2.0/'
API_KEY = '395e6ec6bb557382fc41fde867bce66f'
def request_lastfm(method, **kwargs):
"""
Call a Last.FM API method. Automatically sets the method and API key. Method
will return the result if no error occured.
By default, this method will request the JSON format, since it is lighter
than XML.
"""
# Prepare request
kwargs["method"] = method
kwargs.setdefault("api_key", API_KEY)
kwargs.setdefault("format", "json")
# Send request
logger.debug("Calling Last.FM method: %s", method)
data = request.request_json(ENTRY_POINT, timeout=20, params=kwargs)
# Parse response and check for errors.
if not data:
logger.error("Error calling Last.FM method: %s", method)
return
if "error" in data:
logger.debug("Last.FM returned an error: %s", data["message"])
return
return data
def getSimilar():
myDB = db.DBConnection()
results = myDB.select('SELECT ArtistID from artists ORDER BY HaveTracks DESC')
logger.info("Fetching similar artists from Last.FM for tag cloud")
artistlist = []
for result in results[:12]:
url = 'http://ws.audioscrobbler.com/2.0/?method=artist.getsimilar&mbid=%s&api_key=%s' % (result['ArtistID'], api_key)
try:
data = urllib2.urlopen(url, timeout=20).read()
except:
time.sleep(1)
continue
if not data or len(data) < 200:
continue
try:
d = minidom.parseString(data)
except:
logger.debug("Could not parse similar artist data from last.fm")
node = d.documentElement
artists = d.getElementsByTagName("artist")
for artist in artists:
namenode = artist.getElementsByTagName("name")[0].childNodes
mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
for node in namenode:
artist_name = node.data
for node in mbidnode:
artist_mbid = node.data
try:
data = request_lastfm("artist.getsimilar", mbid=result['ArtistId'])
time.sleep(10)
if data and "similarartists" in data:
artists = data["similarartists"]["artist"]
for artist in artists:
try:
artist_mbid = artist["mbid"]
artist_name = artist["name"]
except TypeError:
continue
if not any(artist_mbid in x for x in results):
artistlist.append((artist_name, artist_mbid))
except:
continue
# Add new artists to tag cloud
logger.debug("Fetched %d artists from Last.FM", len(artistlist))
count = defaultdict(int)
for artist, mbid in artistlist:
count[artist, mbid] += 1
items = count.items()
top_list = sorted(items, key=lambda x: x[1], reverse=True)[:25]
random.shuffle(top_list)
myDB.action('''DELETE from lastfmcloud''')
for tuple in top_list:
artist_name, artist_mbid = tuple[0]
count = tuple[1]
myDB.action('INSERT INTO lastfmcloud VALUES( ?, ?, ?)', [artist_name, artist_mbid, count])
def getArtists():
items = count.items()
top_list = sorted(items, key=lambda x: x[1], reverse=True)[:25]
random.shuffle(top_list)
myDB.action("DELETE from lastfmcloud")
for item in top_list:
artist_name, artist_mbid = item[0]
count = item[1]
myDB.action('INSERT INTO lastfmcloud VALUES( ?, ?, ?)', [artist_name, artist_mbid, count])
logger.debug("Inserted %d artists into Last.FM tag cloud", len(top_list))
def getArtists():
myDB = db.DBConnection()
results = myDB.select('SELECT ArtistID from artists')
if not headphones.LASTFM_USERNAME:
logger.warn("Last.FM username not set, not importing artists.")
return
else:
username = headphones.LASTFM_USERNAME
url = 'http://ws.audioscrobbler.com/2.0/?method=library.getartists&limit=10000&api_key=%s&user=%s' % (api_key, username)
data = urllib2.urlopen(url, timeout=20).read()
try:
d = minidom.parseString(data)
except:
logger.error("Could not parse artist list from last.fm data")
return
artists = d.getElementsByTagName("artist")
artistlist = []
for artist in artists:
mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
for node in mbidnode:
artist_mbid = node.data
try:
logger.info("Fetching artists from Last.FM for username: %s", headphones.LASTFM_USERNAME)
data = request_lastfm("library.getartists", limit=10000, user=headphones.LASTFM_USERNAME)
if data and "artists" in data:
artistlist = []
artists = data["artists"]["artist"]
logger.debug("Fetched %d artists from Last.FM", len(artists))
for artist in artists:
artist_mbid = artist["mbid"]
if not any(artist_mbid in x for x in results):
artistlist.append(artist_mbid)
except:
continue
from headphones import importer
for artistid in artistlist:
importer.addArtisttoDB(artistid)
from headphones import importer
for artistid in artistlist:
importer.addArtisttoDB(artistid)
logger.info("Imported %d new artists from Last.FM", len(artistlist))
def getTagTopArtists(tag, limit=50):
myDB = db.DBConnection()
results = myDB.select('SELECT ArtistID from artists')
url = 'http://ws.audioscrobbler.com/2.0/?method=tag.gettopartists&limit=%s&tag=%s&api_key=%s' % (limit, tag, api_key)
data = urllib2.urlopen(url, timeout=20).read()
logger.info("Fetching top artists from Last.FM for tag: %s", tag)
data = request_lastfm("tag.gettopartists", limit=limit, tag=tag)
try:
d = minidom.parseString(data)
except:
logger.error("Could not parse artist list from last.fm data")
return
if data and "topartists" in data:
artistlist = []
artists = data["topartists"]["artist"]
logger.debug("Fetched %d artists from Last.FM", len(artists))
artists = d.getElementsByTagName("artist")
for artist in artists:
artist_mbid = artist["mbid"]
artistlist = []
for artist in artists:
mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
for node in mbidnode:
artist_mbid = node.data
try:
if not any(artist_mbid in x for x in results):
artistlist.append(artist_mbid)
except:
continue
from headphones import importer
from headphones import importer
for artistid in artistlist:
importer.addArtisttoDB(artistid)
for artistid in artistlist:
importer.addArtisttoDB(artistid)
def getAlbumDescription(rgid, artist, album):
myDB = db.DBConnection()
result = myDB.select('SELECT Summary from descriptions WHERE ReleaseGroupID=?', [rgid])
if result:
return
params = { "method": 'album.getInfo',
"api_key": api_key,
"artist": artist.encode('utf-8'),
"album": album.encode('utf-8')
}
searchURL = 'http://ws.audioscrobbler.com/2.0/?' + urllib.urlencode(params)
data = urllib2.urlopen(searchURL, timeout=20).read()
if data == '<?xml version="1.0" encoding="utf-8"?><lfm status="failed"><error code="6">Album not found</error></lfm>':
return
try:
d = minidom.parseString(data)
albuminfo = d.getElementsByTagName("album")
for item in albuminfo:
summarynode = item.getElementsByTagName("summary")[0].childNodes
contentnode = item.getElementsByTagName("content")[0].childNodes
for node in summarynode:
summary = node.data
for node in contentnode:
content = node.data
controlValueDict = {'ReleaseGroupID': rgid}
newValueDict = {'Summary': summary,
'Content': content}
myDB.upsert("descriptions", newValueDict, controlValueDict)
except:
return
def getAlbumDescriptionOld(rgid, releaselist):
"""
This was a dumb way to do it - going to just use artist & album name but keeping this here
because I may use it to fetch and cache album art
"""
myDB = db.DBConnection()
result = myDB.select('SELECT Summary from descriptions WHERE ReleaseGroupID=?', [rgid])
if result:
return
for release in releaselist:
mbid = release['releaseid']
url = 'http://ws.audioscrobbler.com/2.0/?method=album.getInfo&mbid=%s&api_key=%s' % (mbid, api_key)
data = urllib.urlopen(url).read()
if data == '<?xml version="1.0" encoding="utf-8"?><lfm status="failed"><error code="6">Album not found</error></lfm>':
continue
try:
d = minidom.parseString(data)
albuminfo = d.getElementsByTagName("album")
for item in albuminfo:
summarynode = item.getElementsByTagName("summary")[0].childNodes
contentnode = item.getElementsByTagName("content")[0].childNodes
for node in summarynode:
summary = node.data
for node in contentnode:
content = node.data
controlValueDict = {'ReleaseGroupID': rgid}
newValueDict = {'ReleaseID': mbid,
'Summary': summary,
'Content': content}
myDB.upsert("descriptions", newValueDict, controlValueDict)
break
except:
continue
logger.debug("Added %d new artists from Last.FM", len(artistlist))

View File

@@ -16,10 +16,10 @@
import os
import glob
from lib.beets.mediafile import MediaFile
from beets.mediafile import MediaFile
import headphones
from headphones import db, logger, helpers, importer
from headphones import db, logger, helpers, importer, lastfm
# You can scan a single directory and append it to the current library by specifying append=True, ArtistID & ArtistName
def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, cron=False):
@@ -329,6 +329,8 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, cron=Fal
myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID])
update_album_status()
if not append:
lastfm.getSimilar()
logger.info('Library scan complete')
#ADDED THIS SECTION TO MARK ALBUMS AS DOWNLOADED IF ARTISTS ARE ADDED EN MASSE BEFORE LIBRARY IS SCANNED

View File

@@ -14,83 +14,133 @@
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
import os
import threading
import sys
import logging
import traceback
import threading
import headphones
from logging import handlers
import headphones
from headphones import helpers
MAX_SIZE = 1000000 # 1mb
# These settings are for file logging only
FILENAME = 'headphones.log'
MAX_SIZE = 1000000 # 1 MB
MAX_FILES = 5
# Headphones logger
logger = logging.getLogger('headphones')
# Simple rotating log handler that uses RotatingFileHandler
class RotatingLogger(object):
class LogListHandler(logging.Handler):
"""
Log handler for Web UI.
"""
def __init__(self, filename, max_size, max_files):
self.filename = filename
self.max_size = max_size
self.max_files = max_files
def initLogger(self, verbose=1):
l = logging.getLogger('headphones')
l.setLevel(logging.DEBUG)
self.filename = os.path.join(headphones.LOG_DIR, self.filename)
filehandler = handlers.RotatingFileHandler(self.filename, maxBytes=self.max_size, backupCount=self.max_files)
filehandler.setLevel(logging.DEBUG)
fileformatter = logging.Formatter('%(asctime)s - %(levelname)-7s :: %(message)s', '%d-%b-%Y %H:%M:%S')
filehandler.setFormatter(fileformatter)
l.addHandler(filehandler)
if verbose:
consolehandler = logging.StreamHandler()
if verbose == 1:
consolehandler.setLevel(logging.INFO)
if verbose == 2:
consolehandler.setLevel(logging.DEBUG)
consoleformatter = logging.Formatter('%(asctime)s - %(levelname)s :: %(message)s', '%d-%b-%Y %H:%M:%S')
consolehandler.setFormatter(consoleformatter)
l.addHandler(consolehandler)
def log(self, message, level):
def emit(self, record):
message = self.format(record)
message = message.replace("\n", "<br />")
logger = logging.getLogger('headphones')
threadname = threading.currentThread().getName()
if level != 'DEBUG':
headphones.LOG_LIST.insert(0, (helpers.now(), message, level, threadname))
message = threadname + ' : ' + message
headphones.LOG_LIST.insert(0, (helpers.now(), message, record.levelname, record.threadName))
if level == 'DEBUG':
logger.debug(message)
elif level == 'INFO':
logger.info(message)
elif level == 'WARNING':
logger.warn(message)
else:
logger.error(message)
def initLogger(verbose=1):
"""
Setup logging for Headphones. It uses the logger instance with the name
'headphones'. Three log handlers are added:
headphones_log = RotatingLogger('headphones.log', MAX_SIZE, MAX_FILES)
* RotatingFileHandler: for the file headphones.log
* LogListHandler: for Web UI
* StreamHandler: for console (if verbose > 0)
"""
def debug(message):
headphones_log.log(message, level='DEBUG')
# Configure the logger to accept all messages
logger.propagate = False
logger.setLevel(logging.DEBUG)
def info(message):
headphones_log.log(message, level='INFO')
def warn(message):
headphones_log.log(message, level='WARNING')
def error(message):
headphones_log.log(message, level='ERROR')
# Setup file logger
filename = os.path.join(headphones.LOG_DIR, FILENAME)
file_formatter = logging.Formatter('%(asctime)s - %(levelname)-7s :: %(threadName)s : %(message)s', '%d-%b-%Y %H:%M:%S')
file_handler = handlers.RotatingFileHandler(filename, maxBytes=MAX_SIZE, backupCount=MAX_FILES)
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)
# Add list logger
loglist_handler = LogListHandler()
loglist_handler.setLevel(logging.INFO)
logger.addHandler(loglist_handler)
# Setup console logger
if verbose:
console_formatter = logging.Formatter('%(asctime)s - %(levelname)s :: %(threadName)s : %(message)s', '%d-%b-%Y %H:%M:%S')
console_handler = logging.StreamHandler()
console_handler.setFormatter(console_formatter)
if verbose == 1:
console_handler.setLevel(logging.INFO)
elif verbose == 2:
console_handler.setLevel(logging.DEBUG)
logger.addHandler(console_handler)
# Install exception hooks
initHooks()
def initHooks(global_exceptions=True, thread_exceptions=True, pass_original=True):
"""
This method installs exception catching mechanisms. Any exception caught
will pass through the exception hook, and will be logged to the logger as
an error. Additionally, a traceback is provided.
This is very useful for crashing threads and any other bugs, that may not
be exposed when running as daemon.
The default exception hook is still considered, if pass_original is True.
"""
def excepthook(*exception_info):
# We should always catch this to prevent loops!
try:
message = "".join(traceback.format_exception(*exception_info))
logger.error("Uncaught exception: %s", message)
except:
pass
# Original excepthook
if pass_original:
sys.__excepthook__(*exception_info)
# Global exception hook
if global_exceptions:
sys.excepthook = excepthook
# Thread exception hook
if thread_exceptions:
old_init = threading.Thread.__init__
def new_init(self, *args, **kwargs):
old_init(self, *args, **kwargs)
old_run = self.run
def new_run(*args, **kwargs):
try:
old_run(*args, **kwargs)
except (KeyboardInterrupt, SystemExit):
raise
except:
excepthook(*sys.exc_info())
self.run = new_run
# Monkey patch the run() by monkey patching the __init__ method
threading.Thread.__init__ = new_init
# Expose logger methods
info = logger.info
warn = logger.warn
error = logger.error
debug = logger.debug
warning = logger.warning
exception = logger.exception

View File

@@ -14,11 +14,9 @@
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
import re
import urllib, urllib2
from xml.dom import minidom
import htmlentitydefs
from headphones import logger
from headphones import logger, request
def getLyrics(artist, song):
@@ -26,22 +24,14 @@ def getLyrics(artist, song):
"song": song.encode('utf-8'),
"fmt": 'xml'
}
searchURL = 'http://lyrics.wikia.com/api.php?' + urllib.urlencode(params)
url = 'http://lyrics.wikia.com/api.php'
data = request.request_minidom(url, params=params)
try:
data = urllib2.urlopen(searchURL, timeout=20).read()
except Exception, e:
logger.warn('Error opening: %s. Error: %s' % (searchURL, e))
if not data:
return
try:
parseddata = minidom.parseString(data)
except Exception, e:
logger.warn('Error parsing data from url: %s. Error: %s' % (searchURL, e))
return
url = parseddata.getElementsByTagName("url")
url = data.getElementsByTagName("url")
if url:
lyricsurl = url[0].firstChild.nodeValue
@@ -49,12 +39,12 @@ def getLyrics(artist, song):
logger.info('No lyrics found for %s - %s' % (artist, song))
return
try:
lyricspage = urllib.urlopen(lyricsurl).read()
except Exception, e:
logger.warn('Error fetching lyrics from: %s. Error: %s' % (lyricsurl, e))
lyricspage = request.request_content(lyricsurl)
if not lyricspage:
logger.warn('Error fetching lyrics from: %s' % lyricsurl)
return
m = re.compile('''<div class='lyricbox'><div class='rtMatcher'>.*?</div>(.*?)<!--''').search(lyricspage)
if not m:

View File

@@ -67,7 +67,7 @@ def startmb():
else:
musicbrainzngs.hpauth(mbuser,mbpass)
logger.debug('Using the following server values:\nMBHost: %s ; MBPort: %i ; Sleep Interval: %i ' % (mbhost, mbport, sleepytime))
logger.debug('Using the following server values: MBHost: %s, MBPort: %i, Sleep Interval: %i', mbhost, mbport, sleepytime)
return True
@@ -223,7 +223,7 @@ def getArtist(artistid, extrasonly=False):
# Need to convert extras string from something like '2,5.6' to ['ep','live','remix']
extras = db_artist['Extras']
extras_list = ["single", "ep", "compilation", "soundtrack", "live", "remix", "spokenword", "audiobook"]
extras_list = ["single", "ep", "compilation", "soundtrack", "live", "remix", "spokenword", "audiobook", "other"]
includes = []
i = 1
@@ -373,7 +373,7 @@ def get_new_releases(rgid,includeExtras=False,forcefull=False):
logger.info("Removing all references to release %s to reflect MusicBrainz" % items['ReleaseID'])
force_repackage1 = 1
else:
logger.info("Error pulling data from MusicBrainz: Maintaining dB")
logger.info("There was either an error pulling data from MusicBrainz or there might not be any releases for this category")
num_new_releases = 0
@@ -566,7 +566,10 @@ def findAlbumID(artist=None, album=None):
results = None
try:
term = '"'+album+'" AND artist:"'+artist+'"'
if album and artist:
term = '"'+album+'" AND artist:"'+artist+'"'
else:
term = album
results = musicbrainzngs.search_release_groups(term,1).get('release-group-list')
except WebServiceError, e:
logger.warn('Attempt to query MusicBrainz for %s - %s failed (%s)' % (artist, album, str(e)))

View File

@@ -17,15 +17,11 @@ import os
import headphones
import shutil
import time
import multiprocessing
import subprocess
from headphones import logger
from lib.beets.mediafile import MediaFile
try:
import argparse
except ImportError:
import lib.argparse as argparse
from beets.mediafile import MediaFile
# xld
if headphones.ENCODER == 'xld':
@@ -41,33 +37,36 @@ def encode(albumPath):
global xldProfile
(xldProfile, xldFormat, xldBitrate) = getXldProfile.getXldProfile(headphones.XLDPROFILE)
if not xldFormat:
logger.error(u'Details for xld profile %s not found, files will not be re-encoded' % (xldProfile))
logger.error('Details for xld profile \'%s\' not found, files will not be re-encoded', xldProfile)
return None
tempDirEncode=os.path.join(albumPath,"temp")
musicFiles=[]
musicFinalFiles=[]
musicTempFiles=[]
encoder =""
if not os.path.exists(tempDirEncode):
encoder = ""
# Create temporary directory, but remove the old one first.
try:
if os.path.exists(tempDirEncode):
shutil.rmtree(tempDirEncode)
time.sleep(1)
os.mkdir(tempDirEncode)
else:
shutil.rmtree(tempDirEncode)
time.sleep(1)
os.mkdir(tempDirEncode)
except Exception, e:
logger.exception("Unable to create temporary directory")
return None
for r,d,f in os.walk(albumPath):
for music in f:
if any(music.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS):
if not XLD:
encoderFormat = headphones.ENCODEROUTPUTFORMAT.encode(headphones.SYS_ENCODING)
else:
xldMusicFile = os.path.join(r, music)
xldInfoMusic = MediaFile(xldMusicFile)
encoderFormat = xldFormat
if (headphones.ENCODERLOSSLESS):
ext = os.path.normpath(os.path.splitext(music)[1].lstrip(".")).lower()
if not XLD and ext == 'flac' or XLD and (ext != xldFormat and (xldInfoMusic.bitrate / 1000 > 400)):
@@ -75,7 +74,7 @@ def encode(albumPath):
musicTemp = os.path.normpath(os.path.splitext(music)[0] + '.' + encoderFormat)
musicTempFiles.append(os.path.join(tempDirEncode, musicTemp))
else:
logger.debug('%s is already encoded' % (music))
logger.debug('%s is already encoded', music)
else:
musicFiles.append(os.path.join(r, music))
musicTemp = os.path.normpath(os.path.splitext(music)[0] + '.' + encoderFormat)
@@ -85,7 +84,7 @@ def encode(albumPath):
encoder = headphones.ENCODER_PATH.encode(headphones.SYS_ENCODING)
else:
if XLD:
encoder = os.path.join('/Applications', 'xld')
encoder = os.path.join('/Applications', 'xld')
elif headphones.ENCODER =='lame':
if headphones.SYS_PLATFORM == "win32":
## NEED THE DEFAULT LAME INSTALL ON WIN!
@@ -100,46 +99,77 @@ def encode(albumPath):
i=0
encoder_failed = False
jobs = []
for music in musicFiles:
for music in musicFiles:
infoMusic=MediaFile(music)
encode = False
if XLD:
if xldBitrate and (infoMusic.bitrate / 1000 <= xldBitrate):
logger.info('%s has bitrate <= %skb, will not be re-encoded' % (music.decode(headphones.SYS_ENCODING, 'replace'), xldBitrate))
logger.info('%s has bitrate <= %skb, will not be re-encoded', music.decode(headphones.SYS_ENCODING, 'replace'), xldBitrate)
else:
encode = True
elif headphones.ENCODER == 'lame':
if not any(music.decode(headphones.SYS_ENCODING, 'replace').lower().endswith('.' + x) for x in ["mp3", "wav"]):
logger.warn(u'Lame cannot encode %s format for %s, use ffmpeg' % (os.path.splitext(music)[1].decode(headphones.SYS_ENCODING, 'replace'),music.decode(headphones.SYS_ENCODING, 'replace')))
logger.warn('Lame cannot encode %s format for %s, use ffmpeg', os.path.splitext(music)[1], music)
else:
if (music.decode(headphones.SYS_ENCODING, 'replace').lower().endswith('.mp3') and (int(infoMusic.bitrate / 1000) <= headphones.BITRATE)):
logger.info('%s has bitrate <= %skb, will not be re-encoded' % (music.decode(headphones.SYS_ENCODING, 'replace'),headphones.BITRATE))
logger.info('%s has bitrate <= %skb, will not be re-encoded', music, headphones.BITRATE)
else:
encode = True
else:
if headphones.ENCODEROUTPUTFORMAT=='ogg':
if music.decode(headphones.SYS_ENCODING, 'replace').lower().endswith('.ogg'):
logger.warn('Cannot re-encode .ogg %s' % (music.decode(headphones.SYS_ENCODING, 'replace')))
logger.warn('Cannot re-encode .ogg %s', music.decode(headphones.SYS_ENCODING, 'replace'))
else:
encode = True
elif (headphones.ENCODEROUTPUTFORMAT=='mp3' or headphones.ENCODEROUTPUTFORMAT=='m4a'):
if (music.decode(headphones.SYS_ENCODING, 'replace').lower().endswith('.'+headphones.ENCODEROUTPUTFORMAT) and (int(infoMusic.bitrate / 1000 ) <= headphones.BITRATE)):
logger.info('%s has bitrate <= %skb, will not be re-encoded' % (music.decode(headphones.SYS_ENCODING, 'replace'),headphones.BITRATE))
logger.info('%s has bitrate <= %skb, will not be re-encoded', music, headphones.BITRATE)
else:
encode = True
# encode
if encode:
if not command(encoder,music,musicTempFiles[i],albumPath):
encoder_failed = True
break
job = (encoder, music, musicTempFiles[i], albumPath)
jobs.append(job)
else:
musicFiles[i] = None
musicTempFiles[i] = None
i=i+1
# Encode music files
if len(jobs) > 0:
if headphones.ENCODER_MULTICORE:
if headphones.ENCODER_MULTICORE_COUNT == 0:
processes = multiprocessing.cpu_count()
else:
processes = headphones.ENCODER_MULTICORE_COUNT
logger.debug("Multi-core encoding enabled, %d processes", processes)
else:
processes = 1
# Use multiprocessing only if it's worth the overhead. and if it is
# enabled. If not, then use the old fashioned way.
if processes > 1:
pool = multiprocessing.Pool(processes=processes)
results = pool.map_async(command_map, jobs)
# No new processes will be created, so close it and wait for all
# processes to finish
pool.close()
pool.join()
# Retrieve the results
results = results.get()
else:
results = map(command_map, jobs)
# The results are either True or False, so determine if one is False
encoder_failed = not all(results)
musicFiles = filter(None, musicFiles)
musicTempFiles = filter(None, musicTempFiles)
@@ -148,7 +178,7 @@ def encode(albumPath):
for dest in musicTempFiles:
if not os.path.exists(dest):
encoder_failed = True
logger.error('Encoded file %s does not exist in the destination temp directory' % (dest.decode(headphones.SYS_ENCODING, 'replace')))
logger.error('Encoded file \'%s\' does not exist in the destination temp directory', dest)
# No errors, move from temp to parent
if not encoder_failed and musicTempFiles:
@@ -164,7 +194,7 @@ def encode(albumPath):
try:
shutil.move(dest, albumPath)
except Exception, e:
logger.error('Could not move %s to %s : %s' % (dest.decode(headphones.SYS_ENCODING, 'replace'), albumPath.decode(headphones.SYS_ENCODING, 'replace'), e))
logger.error('Could not move %s to %s: %s', dest, albumPath, e)
encoder_failed = True
break
i += 1
@@ -174,7 +204,7 @@ def encode(albumPath):
# Return with error if any encoding errors
if encoder_failed:
logger.error('One or more files failed to encode, check debuglog and ensure you have the latest version of %s installed' % (headphones.ENCODER))
logger.error('One or more files failed to encode, check debuglog and ensure you have the latest version of %s installed', headphones.ENCODER)
return None
time.sleep(1)
@@ -182,14 +212,24 @@ def encode(albumPath):
for music in f:
if any(music.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS):
musicFinalFiles.append(os.path.join(r, music))
if not musicTempFiles:
logger.info('Encoding for folder %s is not required' % (albumPath.decode(headphones.SYS_ENCODING, 'replace')))
return musicFinalFiles
def command(encoder,musicSource,musicDest,albumPath):
if not musicTempFiles:
logger.info('Encoding for folder \'%s\' is not required', albumPath)
return musicFinalFiles
def command_map(args):
"""
This method is used for the multiprocessing.map() method as a wrapper.
"""
try:
return command(*args)
except Exception, e:
logger.exception("Encoder exception, will return failed")
return False
def command(encoder, musicSource ,musicDest, albumPath):
cmd=[]
startMusicTime=time.time()
@@ -268,7 +308,7 @@ def command(encoder,musicSource,musicDest,albumPath):
logger.debug(out)
encoded = False
else:
logger.info('%s encoded in %s' % (musicSource.decode(headphones.SYS_ENCODING, 'replace'),getTimeEncode(startMusicTime)))
logger.info('%s encoded in %s', musicSource, getTimeEncode(startMusicTime))
encoded = True
return encoded

View File

@@ -13,28 +13,107 @@
# You should have received a copy of the GNU General Public License
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
from headphones import logger, helpers, common
from headphones.exceptions import ex
import base64
import cherrypy
import urllib
import urllib2
import headphones
from httplib import HTTPSConnection
from urllib import urlencode
import simplejson
import os.path
import subprocess
import lib.simplejson as simplejson
from xml.dom import minidom
import gntp.notifier
try:
from urlparse import parse_qsl #@UnusedImport
except:
from cgi import parse_qsl #@Reimport
from xml.dom import minidom
from httplib import HTTPSConnection
from urllib import urlencode
import lib.oauth2 as oauth
import lib.pythontwitter as twitter
from headphones import logger, helpers, common, request
from headphones.exceptions import ex
try:
from urlparse import parse_qsl
except:
from cgi import parse_qsl
class GROWL:
def __init__(self):
self.enabled = headphones.GROWL_ENABLED
self.host = headphones.GROWL_HOST
self.password = headphones.GROWL_PASSWORD
def conf(self, options):
return cherrypy.config['config'].get('Growl', options)
def notify(self, message, event):
if not self.enabled:
return
# Split host and port
if self.host == "":
host, port = "localhost", 23053
if ":" in self.host:
host, port = self.host.split(':', 1)
port = int(port)
else:
host, port = self.host, 23053
# If password is empty, assume none
if self.password == "":
password = None
else:
password = self.password
# Register notification
growl = gntp.notifier.GrowlNotifier(
applicationName='Headphones',
notifications=['New Event'],
defaultNotifications=['New Event'],
hostname=host,
port=port,
password=password
)
try:
growl.register()
except gntp.notifier.errors.NetworkError:
logger.info(u'Growl notification failed: network error')
return
except gntp.notifier.errors.AuthError:
logger.info(u'Growl notification failed: authentication error')
return
# Send it, including an image
image_file = os.path.join(str(headphones.PROG_DIR), 'data/images/headphoneslogo.png')
image = open(image_file, 'rb').read()
try:
growl.notify(
noteType='New Event',
title=event,
description=message,
icon=image
)
except gntp.notifier.errors.NetworkError:
logger.info(u'Growl notification failed: network error')
return
logger.info(u"Growl notifications sent.")
def updateLibrary(self):
#For uniformity reasons not removed
return
def test(self, host, password):
self.enabled = True
self.host = host
self.password = password
self.notify('ZOMG Lazors Pewpewpew!', 'Test Message')
class PROWL:
keys = []
@@ -44,7 +123,6 @@ class PROWL:
self.enabled = headphones.PROWL_ENABLED
self.keys = headphones.PROWL_KEYS
self.priority = headphones.PROWL_PRIORITY
pass
def conf(self, options):
return cherrypy.config['config'].get('Prowl', options)
@@ -93,66 +171,34 @@ class PROWL:
class XBMC:
def __init__(self):
self.hosts = headphones.XBMC_HOST
self.username = headphones.XBMC_USERNAME
self.password = headphones.XBMC_PASSWORD
def _sendhttp(self, host, command):
username = self.username
password = self.password
url_command = urllib.urlencode(command)
url = host + '/xbmcCmds/xbmcHttp/?' + url_command
req = urllib2.Request(url)
if password:
base64string = base64.encodestring('%s:%s' % (username, password)).replace('\n', '')
req.add_header("Authorization", "Basic %s" % base64string)
logger.info('XBMC url: %s' % url)
try:
handle = urllib2.urlopen(req)
except Exception, e:
logger.warn('Error opening XBMC url: %s' % e)
return
response = handle.read().decode(headphones.SYS_ENCODING)
return response
if self.password:
return request.request_content(url, auth=(self.username, self.password))
else:
return request.request_content(url)
def _sendjson(self, host, method, params={}):
data = [{'id': 0, 'jsonrpc': '2.0', 'method': method, 'params': params}]
data = simplejson.JSONEncoder().encode(data)
headers = {'Content-Type': 'application/json'}
url = host + '/jsonrpc'
content = {'Content-Type': 'application/json', 'Content-Length': len(data)}
if self.password:
response = request.request_json(req, method="POST", data=simplejson.dumps(data), headers=headers, auth=(self.username, self.password))
else:
response = request.request_json(req, method="POST", data=simplejson.dumps(data), headers=headers)
req = urllib2.Request(host+'/jsonrpc', data, content)
if self.username and self.password:
base64string = base64.encodestring('%s:%s' % (self.username, self.password)).replace('\n', '')
req.add_header("Authorization", "Basic %s" % base64string)
try:
handle = urllib2.urlopen(req)
except Exception, e:
logger.warn('Error opening XBMC url: %s' % e)
return
response = simplejson.JSONDecoder().decode(handle.read())
try:
if response:
return response[0]['result']
except:
logger.warn('XBMC returned error: %s' % response[0]['error'])
return
def update(self):
# From what I read you can't update the music library on a per directory or per path basis
# so need to update the whole thing
@@ -192,6 +238,51 @@ class XBMC:
except:
logger.warn('Error sending notification request to XBMC')
class LMS:
#Class for updating a Logitech Media Server
def __init__(self):
self.hosts = headphones.LMS_HOST
def _sendjson(self, host):
data = {'id': 1, 'method': 'slim.request', 'params': ["",["rescan"]]} #Had a lot of trouble with simplejson, but this works.
data = simplejson.JSONEncoder().encode(data)
content = {'Content-Type': 'application/json', 'Content-Length': len(data)}
req = urllib2.Request(host+'/jsonrpc.js', data, content)
try:
handle = urllib2.urlopen(req)
except Exception, e:
logger.warn('Error opening LMS url: %s' % e)
return
response = simplejson.JSONDecoder().decode(handle.read())
server_result = simplejson.dumps(response)
try:
return response[0]['result']
except:
logger.warn('LMS returned error: %s' % response[0]['error'])
return
def update(self):
#Send the ["rescan"] command to an LMS server.
#Note that the command must be prefixed with the 'player' that the command is aimed at,
#But with this being a request for the server to update its library, the player is blank, so ""
hosts = [x.strip() for x in self.hosts.split(',')]
for host in hosts:
logger.info('Sending library rescan command to LMS @ '+host)
request = self._sendjson(host)
if not request:
logger.warn('Error sending rescan request to LMS')
class Plex:
@@ -288,21 +379,7 @@ class NMA:
self.priority = headphones.NMA_PRIORITY
def _send(self, data):
url_data = urllib.urlencode(data)
url = 'https://www.notifymyandroid.com/publicapi/notify'
req = urllib2.Request(url, url_data)
try:
handle = urllib2.urlopen(req)
except Exception, e:
logger.warn('Error opening NotifyMyAndroid url: ' % e)
return
response = handle.read().decode(headphones.SYS_ENCODING)
return response
return request.request_content('https://www.notifymyandroid.com/publicapi/notify', data=data)
def notify(self, artist=None, album=None, snatched_nzb=None):
@@ -462,6 +539,8 @@ class PUSHOVER:
self.enabled = headphones.PUSHOVER_ENABLED
self.keys = headphones.PUSHOVER_KEYS
self.priority = headphones.PUSHOVER_PRIORITY
if headphones.PUSHOVER_APITOKEN:
self.application_token = headphones.PUSHOVER_APITOKEN
pass
def conf(self, options):

View File

@@ -46,9 +46,9 @@ def sendNZB(nzb):
nzbGetRPC = xmlrpclib.ServerProxy(url)
try:
if nzbGetRPC.writelog("INFO", "headphones connected to drop of %s any moment now." % (nzb.name + ".nzb")):
logger.debug(u"Successful connected to NZBget")
logger.debug(u"Successfully connected to NZBget")
else:
logger.error(u"Successful connected to NZBget, but unable to send a message" % (nzb.name + ".nzb"))
logger.info(u"Successfully connected to NZBget, but unable to send a message" % (nzb.name + ".nzb"))
except httplib.socket.error:
logger.error(u"Please check your NZBget host and port (if it is running). NZBget is not responding to this combination")
@@ -74,7 +74,7 @@ def sendNZB(nzb):
nzbcontent64 = standard_b64encode(data)
logger.error(u"Sending NZB to NZBget")
logger.info(u"Sending NZB to NZBget")
logger.debug(u"URL: " + url)
if nzbGetRPC.append(nzb.name + ".nzb", headphones.NZBGET_CATEGORY, addToTop, nzbcontent64):

View File

@@ -19,15 +19,15 @@ import os
import time
import threading
import music_encoder
import urllib, shutil, re
import shutil, re
import uuid
from headphones import notifiers
import lib.beets as beets
from lib.beets import autotag
from lib.beets.mediafile import MediaFile
import beets
from beets import autotag
from beets.mediafile import MediaFile
import headphones
from headphones import db, albumart, librarysync, lyrics, logger, helpers
from headphones import db, albumart, librarysync, lyrics, logger, helpers, request
from headphones.helpers import sab_replace_dots, sab_replace_spaces
postprocessor_lock = threading.Lock()
@@ -356,13 +356,17 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list,
if not downloaded_track_list:
return
artwork = None
album_art_path = albumart.getAlbumArt(albumid)
if headphones.EMBED_ALBUM_ART or headphones.ADD_ALBUM_ART:
if album_art_path:
artwork = urllib.urlopen(album_art_path).read()
if not album_art_path or len(artwork) < 100:
artwork = request.request_content(album_art_path)
else:
artwork = None
if not album_art_path or not artwork or len(artwork) < 100:
logger.info("No suitable album art found from Amazon. Checking Last.FM....")
artwork = albumart.getCachedArt(albumid)
if not artwork or len(artwork) < 100:
@@ -400,13 +404,19 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list,
myDB = db.DBConnection()
myDB.action('UPDATE albums SET status = "Downloaded" WHERE AlbumID=?', [albumid])
myDB.action('UPDATE snatched SET status = "Processed" WHERE AlbumID=?', [albumid])
# Update the have tracks for all created dirs:
for albumpath in albumpaths:
librarysync.libraryScan(dir=albumpath, append=True, ArtistID=release['ArtistID'], ArtistName=release['ArtistName'])
logger.info(u'Post-processing for %s - %s complete' % (release['ArtistName'], release['AlbumTitle']))
if headphones.GROWL_ENABLED:
pushmessage = release['ArtistName'] + ' - ' + release['AlbumTitle']
logger.info(u"Growl request")
growl = notifiers.GROWL()
growl.notify(pushmessage,"Download and Postprocessing completed")
if headphones.PROWL_ENABLED:
pushmessage = release['ArtistName'] + ' - ' + release['AlbumTitle']
logger.info(u"Prowl request")
@@ -419,7 +429,11 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list,
xbmc.update()
if headphones.XBMC_NOTIFY:
xbmc.notify(release['ArtistName'], release['AlbumTitle'], album_art_path)
if headphones.LMS_ENABLED:
lms = notifiers.LMS()
lms.update()
if headphones.PLEX_ENABLED:
plex = notifiers.Plex()
if headphones.PLEX_UPDATE:
@@ -446,7 +460,7 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list,
pushmessage = release['ArtistName'] + ' - ' + release['AlbumTitle']
logger.info(u"Pushover request")
pushover = notifiers.PUSHOVER()
pushover.notify(pushmessage,"Download and Postprocessing completed")
pushover.notify(pushmessage,"Headphones")
if headphones.PUSHBULLET_ENABLED:
pushmessage = release['ArtistName'] + ' - ' + release['AlbumTitle']
@@ -471,8 +485,13 @@ def embedAlbumArt(artwork, downloaded_track_list):
continue
logger.debug('Adding album art to: %s' % downloaded_track)
f.art = artwork
f.save()
try:
f.art = artwork
f.save()
except Exception, e:
logger.error(u'Error ebedding album art to: %s. Error: %s' % (downloaded_track.decode(headphones.SYS_ENCODING, 'replace'), str(e)))
continue
def addAlbumArt(artwork, albumpath, release):
logger.info('Adding album art to folder')
@@ -490,9 +509,9 @@ def addAlbumArt(artwork, albumpath, release):
'$year': year
}
album_art_name = helpers.replace_all(headphones.ALBUM_ART_FORMAT.strip(), values).replace('/','_') + ".jpg"
album_art_name = helpers.replace_all(headphones.ALBUM_ART_FORMAT.strip(), values) + ".jpg"
album_art_name = album_art_name.replace('?','_').replace(':', '_').encode(headphones.SYS_ENCODING, 'replace')
album_art_name = helpers.replace_illegal_chars(album_art_name).encode(headphones.SYS_ENCODING, 'replace')
if headphones.FILE_UNDERSCORES:
album_art_name = album_art_name.replace(' ', '_')
@@ -500,11 +519,13 @@ def addAlbumArt(artwork, albumpath, release):
if album_art_name.startswith('.'):
album_art_name = album_art_name.replace(0, '_')
prev = os.umask(headphones.UMASK)
file = open(os.path.join(albumpath, album_art_name), 'wb')
file.write(artwork)
file.close()
os.umask(prev)
try:
file = open(os.path.join(albumpath, album_art_name), 'wb')
file.write(artwork)
file.close()
except Exception, e:
logger.error('Error saving album art: %s' % str(e))
return
def cleanupFiles(albumpath):
logger.info('Cleaning up files')
@@ -558,13 +579,15 @@ def moveFiles(albumpath, release, tracks):
}
folder = helpers.replace_all(headphones.FOLDER_FORMAT.strip(), values)
folder = folder.replace('./', '_/').replace(':','_').replace('?','_').replace('/.','/_').replace('<','_').replace('>','_').replace('|','_')
folder = helpers.replace_illegal_chars(folder, type="folder")
folder = folder.replace('./', '_/').replace('/.','/_')
if folder.endswith('.'):
folder = folder.replace(folder[len(folder)-1], '_')
folder = folder[:-1] + '_'
if folder.startswith('.'):
folder = folder.replace(0, '_')
folder = '_' + folder[1:]
# Grab our list of files early on so we can determine if we need to create
# the lossy_dest_dir, lossless_dest_dir, or both
@@ -602,18 +625,28 @@ def moveFiles(albumpath, release, tracks):
if make_lossless_folder:
# Only rename the folder if they use the album name, otherwise merge into existing folder
if os.path.exists(lossless_destination_path) and 'album' in last_folder.lower():
temp_folder = folder
i = 1
while True:
newfolder = temp_folder + '[%i]' % i
lossless_destination_path = os.path.normpath(os.path.join(headphones.LOSSLESS_DESTINATION_DIR, newfolder)).encode(headphones.SYS_ENCODING, 'replace')
if os.path.exists(lossless_destination_path):
i += 1
else:
temp_folder = newfolder
break
create_duplicate_folder = False
if headphones.REPLACE_EXISTING_FOLDERS:
try:
shutil.rmtree(lossless_destination_path)
except Exception, e:
logger.error("Error deleting existing folder: %s. Creating duplicate folder. Error: %s" % (lossless_destination_path.decode(headphones.SYS_ENCODING, 'replace'), str(e)))
create_duplicate_folder = True
if not headphones.REPLACE_EXISTING_FOLDERS or create_duplicate_folder:
temp_folder = folder
i = 1
while True:
newfolder = temp_folder + '[%i]' % i
lossless_destination_path = os.path.normpath(os.path.join(headphones.LOSSLESS_DESTINATION_DIR, newfolder)).encode(headphones.SYS_ENCODING, 'replace')
if os.path.exists(lossless_destination_path):
i += 1
else:
temp_folder = newfolder
break
if not os.path.exists(lossless_destination_path):
try:
@@ -625,18 +658,28 @@ def moveFiles(albumpath, release, tracks):
if make_lossy_folder:
if os.path.exists(lossy_destination_path) and 'album' in last_folder.lower():
create_duplicate_folder = False
if headphones.REPLACE_EXISTING_FOLDERS:
try:
shutil.rmtree(lossy_destination_path)
except Exception, e:
logger.error("Error deleting existing folder: %s. Creating duplicate folder. Error: %s" % (lossy_destination_path.decode(headphones.SYS_ENCODING, 'replace'), str(e)))
create_duplicate_folder = True
temp_folder = folder
if not headphones.REPLACE_EXISTING_FOLDERS or create_duplicate_folder:
temp_folder = folder
i = 1
while True:
newfolder = temp_folder + '[%i]' % i
lossy_destination_path = os.path.normpath(os.path.join(headphones.DESTINATION_DIR, newfolder)).encode(headphones.SYS_ENCODING, 'replace')
if os.path.exists(lossy_destination_path):
i += 1
else:
temp_folder = newfolder
break
i = 1
while True:
newfolder = temp_folder + '[%i]' % i
lossy_destination_path = os.path.normpath(os.path.join(headphones.DESTINATION_DIR, newfolder)).encode(headphones.SYS_ENCODING, 'replace')
if os.path.exists(lossy_destination_path):
i += 1
else:
temp_folder = newfolder
break
if not os.path.exists(lossy_destination_path):
try:
@@ -751,7 +794,7 @@ def correctMetadata(albumid, release, downloaded_track_list):
except Exception, e:
logger.error('Error getting recommendation: %s. Not writing metadata' % e)
return
if rec == 'RECOMMEND_NONE':
if str(rec) == 'recommendation.none':
logger.warn('No accurate album match found for %s, %s - not writing metadata' % (release['ArtistName'], release['AlbumTitle']))
return
@@ -762,7 +805,7 @@ def correctMetadata(albumid, release, downloaded_track_list):
return
logger.info('Beets recommendation for tagging items: %s' % rec)
# TODO: Handle extra_items & extra_tracks
autotag.apply_metadata(info, mapping)
@@ -798,7 +841,11 @@ def embedLyrics(downloaded_track_list):
if lyrics:
logger.debug('Adding lyrics to: %s' % downloaded_track.decode(headphones.SYS_ENCODING, 'replace'))
f.lyrics = metalyrics
f.save()
try:
f.save()
except:
logger.error('Cannot save lyrics to: %s. Skipping' % downloaded_track.decode(headphones.SYS_ENCODING, 'replace'))
continue
def renameFiles(albumpath, downloaded_track_list, release):
logger.info('Renaming files')
@@ -867,8 +914,7 @@ def renameFiles(albumpath, downloaded_track_list, release):
new_file_name = helpers.replace_all(headphones.FILE_FORMAT.strip(), values).replace('/','_') + ext
new_file_name = new_file_name.replace('?','_').replace(':', '_').encode(headphones.SYS_ENCODING, 'replace')
new_file_name = new_file_name.replace('*','_')
new_file_name = helpers.replace_illegal_chars(new_file_name).encode(headphones.SYS_ENCODING, 'replace')
if headphones.FILE_UNDERSCORES:
new_file_name = new_file_name.replace(' ', '_')
@@ -892,14 +938,14 @@ def renameFiles(albumpath, downloaded_track_list, release):
def updateFilePermissions(albumpaths):
for folder in albumpaths:
logger.info("Updating file permissions in " + folder.decode(headphones.SYS_ENCODING, 'replace'))
logger.info("Updating file permissions in %s", folder)
for r,d,f in os.walk(folder):
for files in f:
full_path = os.path.join(r, files)
try:
os.chmod(full_path, int(headphones.FILE_PERMISSIONS, 8))
except:
logger.error("Could not change permissions for file: " + full_path.decode(headphones.SYS_ENCODING, 'replace'))
logger.error("Could not change permissions for file: %s", full_path)
continue
def renameUnprocessedFolder(albumpath):
@@ -918,100 +964,168 @@ def renameUnprocessedFolder(albumpath):
os.rename(albumpath, new_folder_name)
return
def forcePostProcess():
def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None):
download_dirs = []
if headphones.DOWNLOAD_DIR:
download_dirs.append(headphones.DOWNLOAD_DIR.encode(headphones.SYS_ENCODING, 'replace'))
if headphones.DOWNLOAD_TORRENT_DIR:
download_dirs.append(headphones.DOWNLOAD_TORRENT_DIR.encode(headphones.SYS_ENCODING, 'replace'))
if album_dir:
folders = [album_dir.encode(headphones.SYS_ENCODING, 'replace')]
# If DOWNLOAD_DIR and DOWNLOAD_TORRENT_DIR are the same, remove the duplicate to prevent us from trying to process the same folder twice.
download_dirs = list(set(download_dirs))
logger.info('Checking to see if there are any folders to process in download_dir(s): %s' % str(download_dirs).decode(headphones.SYS_ENCODING, 'replace'))
# Get a list of folders in the download_dir
folders = []
for download_dir in download_dirs:
if not os.path.isdir(download_dir):
logger.warn('Directory ' + download_dir.decode(headphones.SYS_ENCODING, 'replace') + ' does not exist. Skipping')
continue
for folder in os.listdir(download_dir):
path_to_folder = os.path.join(download_dir, folder)
if os.path.isdir(path_to_folder):
folders.append(path_to_folder)
if len(folders):
logger.info('Found %i folders to process' % len(folders))
else:
logger.info('Found no folders to process in: %s' % str(download_dirs).decode(headphones.SYS_ENCODING, 'replace'))
download_dirs = []
if dir:
download_dirs.append(dir.encode(headphones.SYS_ENCODING, 'replace'))
if headphones.DOWNLOAD_DIR and not dir:
download_dirs.append(headphones.DOWNLOAD_DIR.encode(headphones.SYS_ENCODING, 'replace'))
if headphones.DOWNLOAD_TORRENT_DIR and not dir:
download_dirs.append(headphones.DOWNLOAD_TORRENT_DIR.encode(headphones.SYS_ENCODING, 'replace'))
# If DOWNLOAD_DIR and DOWNLOAD_TORRENT_DIR are the same, remove the duplicate to prevent us from trying to process the same folder twice.
download_dirs = list(set(download_dirs))
logger.info('Checking to see if there are any folders to process in download_dir(s): %s', download_dirs)
# Get a list of folders in the download_dir
folders = []
for download_dir in download_dirs:
if not os.path.isdir(download_dir):
logger.warn('Directory %s does not exist. Skipping', download_dir)
continue
for folder in os.listdir(download_dir):
path_to_folder = os.path.join(download_dir, folder)
if os.path.isdir(path_to_folder):
subfolders = helpers.expand_subfolders(path_to_folder)
if expand_subfolders and subfolders is not None:
folders.extend(subfolders)
else:
folders.append(path_to_folder)
if len(folders):
logger.info('Found %i folders to process', len(folders))
else:
logger.info('Found no folders to process in: %s', download_dirs)
# Parse the folder names to get artist album info
myDB = db.DBConnection()
for folder in folders:
folder_basename = os.path.basename(folder).decode(headphones.SYS_ENCODING, 'replace')
logger.info('Processing: %s', folder_basename)
logger.info('Processing: %s' % folder_basename)
# First try to see if there's a match in the snatched table, then we'll try to parse the foldername
# TODO: Iterate through underscores -> spaces, spaces -> dots, underscores -> dots (this might be hit or miss since it assumes
# all spaces/underscores came from sab replacing values
# Attempt 1: First try to see if there's a match in the snatched table,
# then we'll try to parse the foldername.
# TODO: Iterate through underscores -> spaces, spaces -> dots,
# underscores -> dots (this might be hit or miss since it assumes all
# spaces/underscores came from sab replacing values
logger.debug('Attempting to find album in the snatched table')
snatched = myDB.action('SELECT AlbumID, Title, Kind, Status from snatched WHERE FolderName LIKE ?', [folder_basename]).fetchone()
if snatched:
if headphones.KEEP_TORRENT_FILES and snatched['Kind'] == 'torrent' and snatched['Status'] == 'Processed':
logger.info(folder_basename + ' is a torrent folder being preserved for seeding and has already been processed. Skipping.')
logger.info('%s is a torrent folder being preserved for seeding and has already been processed. Skipping.', folder_basename)
continue
else:
logger.info('Found a match in the database: %s. Verifying to make sure it is the correct album' % snatched['Title'])
logger.info('Found a match in the database: %s. Verifying to make sure it is the correct album', snatched['Title'])
verify(snatched['AlbumID'], folder, snatched['Kind'])
continue
# Try to parse the folder name into a valid format
# TODO: Add metadata lookup
try:
name, album, year = helpers.extract_data(folder_basename)
except:
name = None
if name and album and year:
# Attempt 2a: parse the folder name into a valid format
try:
logger.debug('Attempting to extract name, album and year from folder name')
name, album, year = helpers.extract_data(folder_basename)
except Exception as e:
name = album = year = None
if name and album:
release = myDB.action('SELECT AlbumID, ArtistName, AlbumTitle from albums WHERE ArtistName LIKE ? and AlbumTitle LIKE ?', [name, album]).fetchone()
if release:
logger.info('Found a match in the database: %s - %s. Verifying to make sure it is the correct album' % (release['ArtistName'], release['AlbumTitle']))
logger.info('Found a match in the database: %s - %s. Verifying to make sure it is the correct album', release['ArtistName'], release['AlbumTitle'])
verify(release['AlbumID'], folder)
continue
else:
logger.info('Querying MusicBrainz for the release group id for: %s - %s' % (name, album))
logger.info('Querying MusicBrainz for the release group id for: %s - %s', name, album)
from headphones import mb
try:
rgid = mb.findAlbumID(helpers.latinToAscii(name), helpers.latinToAscii(album))
except:
logger.error('Can not get release information for this album')
continue
rgid = None
if rgid:
verify(rgid, folder)
else:
logger.info('No match found on MusicBrainz for: %s - %s' % (name, album))
continue
else:
try:
possible_rgid = folder_basename[-36:]
# re pattern match: [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}
rgid = uuid.UUID(possible_rgid)
except:
logger.info("Couldn't parse " + folder_basename + " into any valid format. If adding albums from another source, they must be in an 'Artist - Album [Year]' format, or end with the musicbrainz release group id")
continue
if rgid:
rgid = possible_rgid
release = myDB.action('SELECT ArtistName, AlbumTitle, AlbumID from albums WHERE AlbumID=?', [rgid]).fetchone()
if release:
logger.info('Found a match in the database: %s - %s. Verifying to make sure it is the correct album' % (release['ArtistName'], release['AlbumTitle']))
verify(release['AlbumID'], folder, forced=True)
else:
logger.info('Found a (possibly) valid Musicbrainz identifier in album folder name - continuing post-processing')
verify(rgid, folder, forced=True)
logger.info('No match found on MusicBrainz for: %s - %s', name, album)
# Attempt 2b: deduce meta data into a valid format
try:
logger.debug('Attempting to extract name, album and year from metadata')
name, album, year = helpers.extract_metadata(folder)
except Exception as e:
name = album = year = None
if name and album:
release = myDB.action('SELECT AlbumID, ArtistName, AlbumTitle from albums WHERE ArtistName LIKE ? and AlbumTitle LIKE ?', [name, album]).fetchone()
if release:
logger.info('Found a match in the database: %s - %s. Verifying to make sure it is the correct album', release['ArtistName'], release['AlbumTitle'])
verify(release['AlbumID'], folder)
continue
else:
logger.info('Querying MusicBrainz for the release group id for: %s - %s', name, album)
from headphones import mb
try:
rgid = mb.findAlbumID(helpers.latinToAscii(name), helpers.latinToAscii(album))
except:
logger.error('Can not get release information for this album')
rgid = None
if rgid:
verify(rgid, folder)
continue
else:
logger.info('No match found on MusicBrainz for: %s - %s', name, album)
# Attempt 3: strip release group id from filename
try:
logger.debug('Attempting to extract release group from folder name')
possible_rgid = folder_basename[-36:]
# re pattern match: [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}
rgid = uuid.UUID(possible_rgid)
except:
logger.info("Couldn't parse '%s' into any valid format. If adding albums from another source, they must be in an 'Artist - Album [Year]' format, or end with the musicbrainz release group id", folder_basename)
rgid = possible_rgid = None
if rgid:
rgid = possible_rgid
release = myDB.action('SELECT ArtistName, AlbumTitle, AlbumID from albums WHERE AlbumID=?', [rgid]).fetchone()
if release:
logger.info('Found a match in the database: %s - %s. Verifying to make sure it is the correct album', release['ArtistName'], release['AlbumTitle'])
verify(release['AlbumID'], folder, forced=True)
continue
else:
logger.info('Found a (possibly) valid Musicbrainz identifier in album folder name - continuing post-processing')
verify(rgid, folder, forced=True)
continue
# Attempt 4: Hail mary. Just assume the folder name is the album name if it doesn't have a separator in it
if '-' not in folder:
release = myDB.action('SELECT AlbumID, ArtistName, AlbumTitle from albums WHERE AlbumTitle LIKE ?', [folder]).fetchone()
if release:
logger.info('Found a match in the database: %s - %s. Verifying to make sure it is the correct album', release['ArtistName'], release['AlbumTitle'])
verify(release['AlbumID'], folder)
continue
else:
logger.info('Querying MusicBrainz for the release group id for: %s', folder)
from headphones import mb
try:
rgid = mb.findAlbumID(album=helpers.latinToAscii(folder))
except:
logger.error('Can not get release information for this album')
rgid = None
if rgid:
verify(rgid, folder)
continue
else:
logger.info('No match found on MusicBrainz for: %s - %s', name, album)

115
headphones/request.py Normal file
View File

@@ -0,0 +1,115 @@
from headphones import logger
from xml.dom import minidom
from bs4 import BeautifulSoup
import requests
import feedparser
def request_response(url, method="get", auto_raise=True, whitelist_status_code=None, **kwargs):
"""
Convenient wrapper for `requests.get', which will capture the exceptions and
log them. On success, the Response object is returned. In case of a
exception, None is returned.
"""
# Convert whitelist_status_code to a list if needed
if whitelist_status_code and type(whitelist_status_code) != list:
whitelist_status_code = [whitelist_status_code]
# Map method to the request.XXX method. This is a simple hack, but it allows
# requests to apply more magic per method. See lib/requests/api.py.
request_method = getattr(requests, method)
try:
# Request the URL
logger.debug("Requesting URL via %s method: %s", method.upper(), url)
response = request_method(url, **kwargs)
# If status code != OK, then raise exception, except if the status code
# is white listed.
if whitelist_status_code and auto_raise:
if response.status_code not in whitelist_status_code:
logger.debug("Response status code %d is not white listed, raising exception", response.status_code)
response.raise_for_status()
elif auto_raise:
response.raise_for_status()
return response
except requests.ConnectionError:
logger.error("Unable to connect to remote host.")
except requests.Timeout:
logger.error("Request timed out.")
except requests.HTTPError, e:
if e.response is not None:
logger.error("Request raise HTTP error with status code: %d", e.response.status_code)
else:
logger.error("Request raised HTTP error.")
except requests.RequestException, e:
logger.error("Request raised exception: %s", e)
def request_soup(url, **kwargs):
"""
Wrapper for `request_response', which will return a BeatifulSoup object if
no exceptions are raised.
"""
parser = kwargs.pop("parser", "html5lib")
response = request_response(url, **kwargs)
if response is not None:
return BeautifulSoup(response.content, parser)
def request_minidom(url, **kwargs):
"""
Wrapper for `request_response', which will return a Minidom object if no
exceptions are raised.
"""
response = request_response(url, **kwargs)
if response is not None:
return minidom.parseString(response.content)
def request_json(url, **kwargs):
"""
Wrapper for `request_response', which will decode the response as JSON
object and return the result, if no exceptions are raised.
As an option, a validator callback can be given, which should return True if
the result is valid.
"""
validator = kwargs.pop("validator", None)
response = request_response(url, **kwargs)
if response is not None:
try:
result = response.json()
if validator and not validator(result):
logger.error("JSON validation result vailed")
else:
return result
except ValueError:
logger.error("Response returned invalid JSON data")
def request_content(url, **kwargs):
"""
Wrapper for `request_response', which will return the raw content.
"""
response = request_response(url, **kwargs)
if response is not None:
return response.content
def request_feed(url, **kwargs):
"""
Wrapper for `request_response', which will return a feed object.
"""
response = request_response(url, **kwargs)
if response is not None:
return feedparser.parse(response.content)

View File

@@ -119,6 +119,10 @@ def sendNZB(nzb):
if sabText == "ok":
logger.info(u"NZB sent to SAB successfully")
if headphones.GROWL_ENABLED and headphones.GROWL_ONSNATCH:
logger.info(u"Sending Growl notification")
growl = notifiers.GROWL()
growl.notify(nzb.name,"Download started")
if headphones.PROWL_ENABLED and headphones.PROWL_ONSNATCH:
logger.info(u"Sending Prowl notification")
prowl = notifiers.PROWL()

File diff suppressed because it is too large Load Diff

View File

@@ -13,30 +13,30 @@
# You should have received a copy of the GNU General Public License
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
import headphones
from headphones import logger, notifiers
import urllib2
import lib.simplejson as json
import base64
import time
import re
import time
import base64
import headphones
import simplejson as json
from headphones import logger, notifiers, request
# This is just a simple script to send torrents to transmission. The
# intention is to turn this into a class where we can check the state
# of the download, set the download dir, etc.
# of the download, set the download dir, etc.
# TODO: Store the session id so we don't need to make 2 calls
# Store torrent id so we can check up on it
def addTorrent(link):
method = 'torrent-add'
arguments = {'filename': link, 'download-dir':headphones.DOWNLOAD_TORRENT_DIR}
arguments = {'filename': link, 'download-dir': headphones.DOWNLOAD_TORRENT_DIR}
response = torrentAction(method,arguments)
if not response:
return False
if response['result'] == 'success':
if 'torrent-added' in response['arguments']:
name = response['arguments']['torrent-added']['name']
@@ -49,14 +49,18 @@ def addTorrent(link):
retid = False
logger.info(u"Torrent sent to Transmission successfully")
if headphones.GROWL_ENABLED and headphones.GROWL_ONSNATCH:
logger.info(u"Sending Growl notification")
growl = notifiers.GROWL()
growl.notify(name,"Download started")
if headphones.PROWL_ENABLED and headphones.PROWL_ONSNATCH:
logger.info(u"Sending Prowl notification")
prowl = notifiers.PROWL()
prowl.notify(name,"Download started")
if headphones.PUSHOVER_ENABLED and headphones.PUSHOVER_ONSNATCH:
logger.info(u"Sending Pushover notification")
prowl = notifiers.PUSHOVER()
prowl.notify(name,"Download started")
pushover = notifiers.PUSHOVER()
pushover.notify(name,"Download started")
if headphones.TWITTER_ENABLED and headphones.TWITTER_ONSNATCH:
logger.info(u"Sending Twitter notification")
twitter = notifiers.TwitterNotifier()
@@ -71,38 +75,38 @@ def addTorrent(link):
pushalot.notify(name,"Download started")
return retid
else:
logger.info('Transmission returned status %s' % response['result'])
return False
def getTorrentFolder(torrentid):
method = 'torrent-get'
arguments = { 'ids': torrentid, 'fields': ['name','percentDone']}
response = torrentAction(method, arguments)
percentdone = response['arguments']['torrents'][0]['percentDone']
torrent_folder_name = response['arguments']['torrents'][0]['name']
tries = 1
while percentdone == 0 and tries <10:
tries+=1
time.sleep(5)
response = torrentAction(method, arguments)
percentdone = response['arguments']['torrents'][0]['percentDone']
torrent_folder_name = response['arguments']['torrents'][0]['name']
return torrent_folder_name
def torrentAction(method, arguments):
host = headphones.TRANSMISSION_HOST
username = headphones.TRANSMISSION_USERNAME
password = headphones.TRANSMISSION_PASSWORD
sessionid = None
if not host.startswith('http'):
host = 'http://' + host
@@ -127,36 +131,32 @@ def torrentAction(method, arguments):
logger.error('Transmission port missing')
return
request = urllib2.Request(host)
# Retrieve session id
if username and password:
base64string = base64.encodestring('%s:%s' % (username, password)).replace('\n', '')
request.add_header("Authorization", "Basic %s" % base64string)
opener = urllib2.build_opener()
try:
data = opener.open(request).read()
except urllib2.HTTPError, e:
if e.code == 409:
sessionid = e.hdrs['x-transmission-session-id']
else:
logger.error('Could not connect to Transmission. Error: ' + str(e))
except Exception, e:
logger.error('Could not connect to Transmission. Error: ' + str(e))
response = request.request_response(host, auth=(username, password), whitelist_status_code=409)
else:
response = request.request_response(host, whitelist_status_code=409)
if response is None:
logger.error("Error gettings Transmission session ID")
return
# Parse session id
if response.status_code == 409:
sessionid = response.headers['x-transmission-session-id']
if not sessionid:
logger.error("Error getting Session ID from Transmission")
return
request.add_header('x-transmission-session-id', sessionid)
postdata = json.dumps({ 'method': method,
'arguments': arguments })
request.add_data(postdata)
try:
response = json.loads(opener.open(request).read())
except Exception, e:
logger.error("Error sending torrent to Transmission: " + str(e))
# Prepare next request
headers = { 'x-transmission-session-id': sessionid }
data = { 'method': method, 'arguments': arguments }
response = request.request_json(host, method="post", data=json.dumps(data), headers=headers)
if not response:
logger.error("Error sending torrent to Transmission")
return
return response

View File

@@ -13,10 +13,10 @@
# You should have received a copy of the GNU General Public License
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
import platform, subprocess, re, os, urllib2, tarfile
import platform, subprocess, re, os, tarfile
import headphones
from headphones import logger, version
from headphones import logger, version, request
from headphones.exceptions import ex
import lib.simplejson as simplejson
@@ -31,20 +31,20 @@ def runGit(args):
if platform.system().lower() == 'darwin':
git_locations.append('/usr/local/git/bin/git')
output = err = None
for cur_git in git_locations:
cmd = cur_git+' '+args
try:
logger.debug('Trying to execute: "' + cmd + '" with shell in ' + headphones.PROG_DIR)
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, cwd=headphones.PROG_DIR)
output, err = p.communicate()
output = output.strip()
logger.debug('Git output: ' + output)
except OSError:
logger.debug('Command ' + cmd + ' didn\'t work, couldn\'t find git')
logger.debug('Command failed: %s', cmd)
continue
if 'not found' in output or "not recognized as an internal or external command" in output:
@@ -65,7 +65,7 @@ def getVersion():
headphones.INSTALL_TYPE = 'win'
# Don't have a way to update exe yet, but don't want to set VERSION to None
return 'Windows Install'
return 'Windows Install', 'master'
elif os.path.isdir(os.path.join(headphones.PROG_DIR, '.git')):
@@ -74,15 +74,29 @@ def getVersion():
if not output:
logger.error('Couldn\'t find latest installed version.')
return None
cur_commit_hash = None
cur_commit_hash = output.strip()
cur_commit_hash = output
if not re.match('^[a-z0-9]+$', cur_commit_hash):
logger.error('Output doesn\'t look like a hash, not using it')
return None
cur_commit_hash = None
return cur_commit_hash
if headphones.DO_NOT_OVERRIDE_GIT_BRANCH and headphones.GIT_BRANCH:
branch_name = headphones.GIT_BRANCH
else:
branch_name, err = runGit('rev-parse --abbrev-ref HEAD')
branch_name = branch_name
if not branch_name and headphones.GIT_BRANCH:
logger.error('Could not retrieve branch name from git. Falling back to %s' % headphones.GIT_BRANCH)
branch_name = headphones.GIT_BRANCH
if not branch_name:
logger.error('Could not retrieve branch name from git. Defaulting to master')
branch_name = 'master'
return cur_commit_hash, branch_name
else:
@@ -91,69 +105,65 @@ def getVersion():
version_file = os.path.join(headphones.PROG_DIR, 'version.txt')
if not os.path.isfile(version_file):
return None
return None, 'master'
fp = open(version_file, 'r')
current_version = fp.read().strip(' \n\r')
fp.close()
if current_version:
return current_version
return current_version, headphones.GIT_BRANCH
else:
return None
return None, 'master'
def checkGithub():
headphones.COMMITS_BEHIND = 0
# Get the latest commit available from github
# Get the latest version available from github
logger.info('Retrieving latest version information from GitHub')
url = 'https://api.github.com/repos/%s/headphones/commits/%s' % (headphones.GIT_USER, headphones.GIT_BRANCH)
logger.info('Retrieving latest version information from github')
try:
result = urllib2.urlopen(url, timeout=20).read()
git = simplejson.JSONDecoder().decode(result)
headphones.LATEST_VERSION = git['sha']
except:
logger.warn('Could not get the latest commit from github')
headphones.COMMITS_BEHIND = 0
version = request.request_json(url, timeout=20, validator=lambda x: type(x) == dict)
if version is None:
logger.warn('Could not get the latest version from GitHub. Are you running a local development version?')
return headphones.CURRENT_VERSION
headphones.LATEST_VERSION = version['sha']
logger.debug("Latest version is %s", headphones.LATEST_VERSION)
# See how many commits behind we are
if headphones.CURRENT_VERSION:
logger.info('Comparing currently installed version with latest github version')
url = 'https://api.github.com/repos/%s/headphones/compare/%s...%s' % (headphones.GIT_USER, headphones.CURRENT_VERSION, headphones.LATEST_VERSION)
try:
result = urllib2.urlopen(url, timeout=20).read()
git = simplejson.JSONDecoder().decode(result)
headphones.COMMITS_BEHIND = git['total_commits']
except:
logger.warn('Could not get commits behind from github')
headphones.COMMITS_BEHIND = 0
return headphones.CURRENT_VERSION
if headphones.COMMITS_BEHIND >= 1:
logger.info('New version is available. You are %s commits behind' % headphones.COMMITS_BEHIND)
elif headphones.COMMITS_BEHIND == 0:
logger.info('Headphones is up to date')
elif headphones.COMMITS_BEHIND == -1:
logger.info('You are running an unknown version of Headphones. Run the updater to identify your version')
else:
if not headphones.CURRENT_VERSION:
logger.info('You are running an unknown version of Headphones. Run the updater to identify your version')
return headphones.CURRENT_VERSION
logger.info('Comparing currently installed version with latest GitHub version')
url = 'https://api.github.com/repos/%s/headphones/compare/%s...%s' % (headphones.GIT_USER, headphones.CURRENT_VERSION, headphones.LATEST_VERSION)
commits = request.request_json(url, timeout=20, whitelist_status_code=404, validator=lambda x: type(x) == dict)
if commits is None:
logger.warn('Could not get commits behind from GitHub.')
return headphones.CURRENT_VERSION
try:
headphones.COMMITS_BEHIND = int(commits['behind_by'])
logger.debug("In total, %d commits behind", headphones.COMMITS_BEHIND)
except KeyError:
logger.info('Cannot compare versions. Are you running a local development version?')
headphones.COMMITS_BEHIND = 0
if headphones.COMMITS_BEHIND > 0:
logger.info('New version is available. You are %s commits behind' % headphones.COMMITS_BEHIND)
elif headphones.COMMITS_BEHIND == 0:
logger.info('Headphones is up to date')
return headphones.LATEST_VERSION
def update():
if headphones.INSTALL_TYPE == 'win':
logger.info('Windows .exe updating not supported yet.')
pass
elif headphones.INSTALL_TYPE == 'git':
output, err = runGit('pull origin ' + version.HEADPHONES_VERSION)
output, err = runGit('pull origin ' + headphones.GIT_BRANCH)
if not output:
logger.error('Couldn\'t download latest version')
@@ -168,20 +178,18 @@ def update():
logger.info('Output: ' + str(output))
else:
tar_download_url = 'https://github.com/%s/headphones/tarball/%s' % (headphones.GIT_USER, headphones.GIT_BRANCH)
update_dir = os.path.join(headphones.PROG_DIR, 'update')
version_path = os.path.join(headphones.PROG_DIR, 'version.txt')
try:
logger.info('Downloading update from: '+tar_download_url)
data = urllib2.urlopen(tar_download_url)
except (IOError, URLError):
logger.error("Unable to retrieve new version from "+tar_download_url+", can't update")
logger.info('Downloading update from: '+tar_download_url)
data = request.request_content(tar_download_url)
if not data:
logger.error("Unable to retrieve new version from '%s', can't update", tar_download_url)
return
download_name = data.geturl().split('/')[-1]
tar_download_path = os.path.join(headphones.PROG_DIR, download_name)
# Save tar to disk
@@ -190,13 +198,13 @@ def update():
f.close()
# Extract the tar to update folder
logger.info('Extracing file' + tar_download_path)
logger.info('Extracting file: ' + tar_download_path)
tar = tarfile.open(tar_download_path)
tar.extractall(update_dir)
tar.close()
# Delete the tar.gz
logger.info('Deleting file' + tar_download_path)
logger.info('Deleting file: ' + tar_download_path)
os.remove(tar_download_path)
# Find update dir name

View File

@@ -85,7 +85,7 @@ class WebInterface(object):
raise cherrypy.HTTPRedirect("home")
# Serve the extras up as a dict to make things easier for new templates
extras_list = ["single", "ep", "compilation", "soundtrack", "live", "remix", "spokenword", "audiobook"]
extras_list = ["single", "ep", "compilation", "soundtrack", "live", "remix", "spokenword", "audiobook", "other"]
extras_dict = {}
if not artist['Extras']:
@@ -134,7 +134,6 @@ class WebInterface(object):
def addArtist(self, artistid):
threading.Thread(target=importer.addArtisttoDB, args=[artistid]).start()
threading.Thread(target=lastfm.getSimilar).start()
raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % artistid)
addArtist.exposed = True
@@ -144,12 +143,12 @@ class WebInterface(object):
#
# If they are, we need to convert kwargs to string format
if not newstyle:
extras = "1,2,3,4,5,6,7,8"
extras = "1,2,3,4,5,6,7,8,9"
else:
temp_extras_list = []
# TODO: Put these extras as a global variable
i = 1
for extra in ["single", "ep", "compilation", "soundtrack", "live", "remix", "spokenword", "audiobook"]:
for extra in ["single", "ep", "compilation", "soundtrack", "live", "remix", "spokenword", "audiobook", "other"]:
if extra in kwargs:
temp_extras_list.append(i)
i += 1
@@ -175,6 +174,7 @@ class WebInterface(object):
myDB.action('DELETE from albums WHERE ArtistID=? AND AlbumID=?', [ArtistID, album['AlbumID']])
myDB.action('DELETE from allalbums WHERE ArtistID=? AND AlbumID=?', [ArtistID, album['AlbumID']])
myDB.action('DELETE from alltracks WHERE ArtistID=? AND AlbumID=?', [ArtistID, album['AlbumID']])
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', album['AlbumID'])
raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID)
removeExtras.exposed = True
@@ -203,8 +203,18 @@ class WebInterface(object):
for name in namecheck:
artistname=name['ArtistName']
myDB.action('DELETE from artists WHERE ArtistID=?', [ArtistID])
rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM albums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
for rgid in rgids:
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
myDB.action('DELETE from albums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from tracks WHERE ArtistID=?', [ArtistID])
rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM allalbums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
for rgid in rgids:
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
myDB.action('DELETE from allalbums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from alltracks WHERE ArtistID=?', [ArtistID])
myDB.action('UPDATE have SET Matched=NULL WHERE ArtistName=?', [artistname])
@@ -219,8 +229,18 @@ class WebInterface(object):
for ArtistID in emptyArtistIDs:
logger.info(u"Deleting all traces of artist: " + ArtistID)
myDB.action('DELETE from artists WHERE ArtistID=?', [ArtistID])
rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM albums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
for rgid in rgids:
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
myDB.action('DELETE from albums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from tracks WHERE ArtistID=?', [ArtistID])
rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM allalbums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
for rgid in rgids:
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
myDB.action('DELETE from allalbums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from alltracks WHERE ArtistID=?', [ArtistID])
myDB.action('INSERT OR REPLACE into blacklist VALUES (?)', [ArtistID])
@@ -283,6 +303,50 @@ class WebInterface(object):
raise cherrypy.HTTPRedirect(redirect)
queueAlbum.exposed = True
def choose_specific_download(self, AlbumID):
results = searcher.searchforalbum(AlbumID, choose_specific_download=True)
results_as_dicts = []
for result in results:
result_dict = {
'title':result[0],
'size':result[1],
'url':result[2],
'provider':result[3],
'kind':result[4]
}
results_as_dicts.append(result_dict)
s = simplejson.dumps(results_as_dicts)
cherrypy.response.headers['Content-type'] = 'application/json'
return s
choose_specific_download.exposed = True
def download_specific_release(self, AlbumID, title, size, url, provider, kind, **kwargs):
# Handle situations where the torrent url contains arguments that are parsed
if kwargs:
import urllib, urllib2
url = urllib2.quote(url, safe=":?/=&") + '&' + urllib.urlencode(kwargs)
try:
result = [(title,int(size),url,provider,kind)]
except ValueError:
result = [(title,float(size),url,provider,kind)]
logger.info(u"Making sure we can download the chosen result")
(data, bestqual) = searcher.preprocess(result)
if data and bestqual:
myDB = db.DBConnection()
album = myDB.action('SELECT * from albums WHERE AlbumID=?', [AlbumID]).fetchone()
searcher.send_to_downloader(data, bestqual, album)
download_specific_release.exposed = True
def unqueueAlbum(self, AlbumID, ArtistID):
logger.info(u"Marking album: " + AlbumID + "as skipped...")
myDB = db.DBConnection()
@@ -299,6 +363,7 @@ class WebInterface(object):
myDB.action('DELETE from tracks WHERE AlbumID=?', [AlbumID])
myDB.action('DELETE from allalbums WHERE AlbumID=?', [AlbumID])
myDB.action('DELETE from alltracks WHERE AlbumID=?', [AlbumID])
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [AlbumID])
if ArtistID:
raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID)
else:
@@ -552,8 +617,18 @@ class WebInterface(object):
for ArtistID in args:
if action == 'delete':
myDB.action('DELETE from artists WHERE ArtistID=?', [ArtistID])
rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM albums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
for rgid in rgids:
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
myDB.action('DELETE from albums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from tracks WHERE ArtistID=?', [ArtistID])
rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM allalbums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
for rgid in rgids:
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
myDB.action('DELETE from allalbums WHERE AlbumID=?', [AlbumID])
myDB.action('DELETE from alltracks WHERE AlbumID=?', [AlbumID])
myDB.action('INSERT OR REPLACE into blacklist VALUES (?)', [ArtistID])
@@ -627,9 +702,9 @@ class WebInterface(object):
raise cherrypy.HTTPRedirect("home")
forceSearch.exposed = True
def forcePostProcess(self):
def forcePostProcess(self, dir=None, album_dir=None):
from headphones import postprocessor
threading.Thread(target=postprocessor.forcePostProcess).start()
threading.Thread(target=postprocessor.forcePostProcess, kwargs={'dir':dir,'album_dir':album_dir}).start()
raise cherrypy.HTTPRedirect("home")
forcePostProcess.exposed = True
@@ -659,7 +734,7 @@ class WebInterface(object):
if sSearch == "":
filtered = headphones.LOG_LIST[::]
else:
filtered = [row for row in headphones.LOG_LIST for column in row if sSearch in column]
filtered = [row for row in headphones.LOG_LIST for column in row if sSearch.lower() in column.lower()]
sortcolumn = 0
if iSortCol_0 == '1':
@@ -768,14 +843,18 @@ class WebInterface(object):
return json_albums
getAlbumsByArtist_json.exposed=True
def clearhistory(self, type=None):
def clearhistory(self, type=None, date_added=None, title=None):
myDB = db.DBConnection()
if type == 'all':
logger.info(u"Clearing all history")
myDB.action('DELETE from snatched')
if type:
if type == 'all':
logger.info(u"Clearing all history")
myDB.action('DELETE from snatched')
else:
logger.info(u"Clearing history where status is %s" % type)
myDB.action('DELETE from snatched WHERE Status=?', [type])
else:
logger.info(u"Clearing history where status is %s" % type)
myDB.action('DELETE from snatched WHERE Status=?', [type])
logger.info(u"Deleting '%s' from history" % title)
myDB.action('DELETE from snatched WHERE Title=? AND DateAdded=?', [title, date_added])
raise cherrypy.HTTPRedirect("history")
clearhistory.exposed = True
@@ -907,6 +986,7 @@ class WebInterface(object):
"album_art_format" : headphones.ALBUM_ART_FORMAT,
"embed_album_art" : checked(headphones.EMBED_ALBUM_ART),
"embed_lyrics" : checked(headphones.EMBED_LYRICS),
"replace_existing_folders" : checked(headphones.REPLACE_EXISTING_FOLDERS),
"dest_dir" : headphones.DESTINATION_DIR,
"lossless_dest_dir" : headphones.LOSSLESS_DESTINATION_DIR,
"folder_format" : headphones.FOLDER_FORMAT,
@@ -916,6 +996,10 @@ class WebInterface(object):
"autowant_upcoming" : checked(headphones.AUTOWANT_UPCOMING),
"autowant_all" : checked(headphones.AUTOWANT_ALL),
"keep_torrent_files" : checked(headphones.KEEP_TORRENT_FILES),
"prefer_torrents_0" : radio(headphones.PREFER_TORRENTS, 0),
"prefer_torrents_1" : radio(headphones.PREFER_TORRENTS, 1),
"prefer_torrents_2" : radio(headphones.PREFER_TORRENTS, 2),
"open_magnet_links" : checked(headphones.OPEN_MAGNET_LINKS),
"log_dir" : headphones.LOG_DIR,
"cache_dir" : headphones.CACHE_DIR,
"interface_list" : interface_list,
@@ -930,7 +1014,13 @@ class WebInterface(object):
"encodervbrcbr": headphones.ENCODERVBRCBR,
"encoderquality": headphones.ENCODERQUALITY,
"encoderlossless": checked(headphones.ENCODERLOSSLESS),
"encoder_multicore": checked(headphones.ENCODER_MULTICORE),
"encoder_multicore_count": int(headphones.ENCODER_MULTICORE_COUNT),
"delete_lossless_files": checked(headphones.DELETE_LOSSLESS_FILES),
"growl_enabled": checked(headphones.GROWL_ENABLED),
"growl_onsnatch": checked(headphones.GROWL_ONSNATCH),
"growl_host": headphones.GROWL_HOST,
"growl_password": headphones.GROWL_PASSWORD,
"prowl_enabled": checked(headphones.PROWL_ENABLED),
"prowl_onsnatch": checked(headphones.PROWL_ONSNATCH),
"prowl_keys": headphones.PROWL_KEYS,
@@ -941,6 +1031,8 @@ class WebInterface(object):
"xbmc_password": headphones.XBMC_PASSWORD,
"xbmc_update": checked(headphones.XBMC_UPDATE),
"xbmc_notify": checked(headphones.XBMC_NOTIFY),
"lms_enabled": checked(headphones.LMS_ENABLED),
"lms_host": headphones.LMS_HOST,
"plex_enabled": checked(headphones.PLEX_ENABLED),
"plex_server_host": headphones.PLEX_SERVER_HOST,
"plex_client_host": headphones.PLEX_CLIENT_HOST,
@@ -959,6 +1051,7 @@ class WebInterface(object):
"pushover_enabled": checked(headphones.PUSHOVER_ENABLED),
"pushover_onsnatch": checked(headphones.PUSHOVER_ONSNATCH),
"pushover_keys": headphones.PUSHOVER_KEYS,
"pushover_apitoken": headphones.PUSHOVER_APITOKEN,
"pushover_priority": headphones.PUSHOVER_PRIORITY,
"pushbullet_enabled": checked(headphones.PUSHBULLET_ENABLED),
"pushbullet_onsnatch": checked(headphones.PUSHBULLET_ONSNATCH),
@@ -983,7 +1076,7 @@ class WebInterface(object):
}
# Need to convert EXTRAS to a dictionary we can pass to the config: it'll come in as a string like 2,5,6,8
extras_list = ["single", "ep", "compilation", "soundtrack", "live", "remix", "spokenword", "audiobook"]
extras_list = ["single", "ep", "compilation", "soundtrack", "live", "remix", "spokenword", "audiobook", "other"]
extras_dict = {}
i = 1
@@ -1006,16 +1099,16 @@ class WebInterface(object):
use_headphones_indexer=0, newznab=0, newznab_host=None, newznab_apikey=None, newznab_enabled=0, nzbsorg=0, nzbsorg_uid=None, nzbsorg_hash=None, nzbsrus=0, nzbsrus_uid=None, nzbsrus_apikey=None, omgwtfnzbs=0, omgwtfnzbs_uid=None, omgwtfnzbs_apikey=None,
preferred_words=None, required_words=None, ignored_words=None, preferred_quality=0, preferred_bitrate=None, detect_bitrate=0, move_files=0, torrentblackhole_dir=None, download_torrent_dir=None,
numberofseeders=None, use_piratebay=0, piratebay_proxy_url=None, use_isohunt=0, use_kat=0, use_mininova=0, waffles=0, waffles_uid=None, waffles_passkey=None, whatcd=0, whatcd_username=None, whatcd_password=None,
rutracker=0, rutracker_user=None, rutracker_password=None, rename_files=0, correct_metadata=0, cleanup_files=0, add_album_art=0, album_art_format=None, embed_album_art=0, embed_lyrics=0,
rutracker=0, rutracker_user=None, rutracker_password=None, rename_files=0, correct_metadata=0, cleanup_files=0, add_album_art=0, album_art_format=None, embed_album_art=0, embed_lyrics=0, replace_existing_folders=False,
destination_dir=None, lossless_destination_dir=None, folder_format=None, file_format=None, file_underscores=0, include_extras=0, single=0, ep=0, compilation=0, soundtrack=0, live=0,
remix=0, spokenword=0, audiobook=0, autowant_upcoming=False, autowant_all=False, keep_torrent_files=False, interface=None, log_dir=None, cache_dir=None, music_encoder=0, encoder=None, xldprofile=None,
remix=0, spokenword=0, audiobook=0, other=0, autowant_upcoming=False, autowant_all=False, keep_torrent_files=False, prefer_torrents=0, open_magnet_links=0, interface=None, log_dir=None, cache_dir=None, music_encoder=0, encoder=None, xldprofile=None,
bitrate=None, samplingfrequency=None, encoderfolder=None, advancedencoder=None, encoderoutputformat=None, encodervbrcbr=None, encoderquality=None, encoderlossless=0,
delete_lossless_files=0, prowl_enabled=0, prowl_onsnatch=0, prowl_keys=None, prowl_priority=0, xbmc_enabled=0, xbmc_host=None, xbmc_username=None, xbmc_password=None,
xbmc_update=0, xbmc_notify=0, nma_enabled=False, nma_apikey=None, nma_priority=0, nma_onsnatch=0, pushalot_enabled=False, pushalot_apikey=None, pushalot_onsnatch=0, synoindex_enabled=False,
pushover_enabled=0, pushover_onsnatch=0, pushover_keys=None, pushover_priority=0, pushbullet_enabled=0, pushbullet_onsnatch=0, pushbullet_apikey=None, pushbullet_deviceid=None, twitter_enabled=0, twitter_onsnatch=0, mirror=None, customhost=None, customport=None,
delete_lossless_files=0, growl_enabled=0, growl_onsnatch=0, growl_host=None, growl_password=None, prowl_enabled=0, prowl_onsnatch=0, prowl_keys=None, prowl_priority=0, xbmc_enabled=0, xbmc_host=None, xbmc_username=None, xbmc_password=None,
xbmc_update=0, xbmc_notify=0, nma_enabled=False, nma_apikey=None, nma_priority=0, nma_onsnatch=0, pushalot_enabled=False, pushalot_apikey=None, pushalot_onsnatch=0, synoindex_enabled=False, lms_enabled=0, lms_host=None,
pushover_enabled=0, pushover_onsnatch=0, pushover_keys=None, pushover_priority=0, pushover_apitoken=None, pushbullet_enabled=0, pushbullet_onsnatch=0, pushbullet_apikey=None, pushbullet_deviceid=None, twitter_enabled=0, twitter_onsnatch=0, mirror=None, customhost=None, customport=None,
customsleep=None, hpuser=None, hppass=None, preferred_bitrate_high_buffer=None, preferred_bitrate_low_buffer=None, preferred_bitrate_allow_lossless=0, cache_sizemb=None,
enable_https=0, https_cert=None, https_key=None, file_permissions=None, folder_permissions=None, plex_enabled=0, plex_server_host=None, plex_client_host=None, plex_username=None,
plex_password=None, plex_update=0, plex_notify=0, songkick_enabled=0, songkick_apikey=None, songkick_location=None, songkick_filter_enabled=0, **kwargs):
plex_password=None, plex_update=0, plex_notify=0, songkick_enabled=0, songkick_apikey=None, songkick_location=None, songkick_filter_enabled=0, encoder_multicore=False, encoder_multicore_count=0, **kwargs):
headphones.HTTP_HOST = http_host
headphones.HTTP_PORT = http_port
@@ -1100,6 +1193,7 @@ class WebInterface(object):
headphones.ALBUM_ART_FORMAT = album_art_format
headphones.EMBED_ALBUM_ART = embed_album_art
headphones.EMBED_LYRICS = embed_lyrics
headphones.REPLACE_EXISTING_FOLDERS = replace_existing_folders
headphones.DESTINATION_DIR = destination_dir
headphones.LOSSLESS_DESTINATION_DIR = lossless_destination_dir
headphones.FOLDER_FORMAT = folder_format
@@ -1109,6 +1203,8 @@ class WebInterface(object):
headphones.AUTOWANT_UPCOMING = autowant_upcoming
headphones.AUTOWANT_ALL = autowant_all
headphones.KEEP_TORRENT_FILES = keep_torrent_files
headphones.PREFER_TORRENTS = int(prefer_torrents)
headphones.OPEN_MAGNET_LINKS = open_magnet_links
headphones.INTERFACE = interface
headphones.LOG_DIR = log_dir
headphones.CACHE_DIR = cache_dir
@@ -1123,7 +1219,13 @@ class WebInterface(object):
headphones.ENCODERVBRCBR = encodervbrcbr
headphones.ENCODERQUALITY = int(encoderquality)
headphones.ENCODERLOSSLESS = int(encoderlossless)
headphones.ENCODER_MULTICORE = encoder_multicore
headphones.ENCODER_MULTICORE_COUNT = max(0, int(encoder_multicore_count))
headphones.DELETE_LOSSLESS_FILES = int(delete_lossless_files)
headphones.GROWL_ENABLED = growl_enabled
headphones.GROWL_ONSNATCH = growl_onsnatch
headphones.GROWL_HOST = growl_host
headphones.GROWL_PASSWORD = growl_password
headphones.PROWL_ENABLED = prowl_enabled
headphones.PROWL_ONSNATCH = prowl_onsnatch
headphones.PROWL_KEYS = prowl_keys
@@ -1134,6 +1236,8 @@ class WebInterface(object):
headphones.XBMC_PASSWORD = xbmc_password
headphones.XBMC_UPDATE = xbmc_update
headphones.XBMC_NOTIFY = xbmc_notify
headphones.LMS_ENABLED = lms_enabled
headphones.LMS_HOST = lms_host
headphones.PLEX_ENABLED = plex_enabled
headphones.PLEX_SERVER_HOST = plex_server_host
headphones.PLEX_CLIENT_HOST = plex_client_host
@@ -1153,6 +1257,7 @@ class WebInterface(object):
headphones.PUSHOVER_ONSNATCH = pushover_onsnatch
headphones.PUSHOVER_KEYS = pushover_keys
headphones.PUSHOVER_PRIORITY = pushover_priority
headphones.PUSHOVER_APITOKEN = pushover_apitoken
headphones.PUSHBULLET_ENABLED = pushbullet_enabled
headphones.PUSHBULLET_ONSNATCH = pushbullet_onsnatch
headphones.PUSHBULLET_APIKEY = pushbullet_apikey
@@ -1191,7 +1296,7 @@ class WebInterface(object):
# Convert the extras to list then string. Coming in as 0 or 1
temp_extras_list = []
extras_list = [single, ep, compilation, soundtrack, live, remix, spokenword, audiobook]
extras_list = [single, ep, compilation, soundtrack, live, remix, spokenword, audiobook, other]
i = 1
for extra in extras_list:

View File

@@ -62,7 +62,7 @@ def initialize(options={}):
else:
protocol = "http"
logger.info(u"Starting Headphones on " + protocol + "://" + str(options['http_host']) + ":" + str(options['http_port']) + "/")
logger.info("Starting Headphones on %s://%s:%d/", protocol, options['http_host'], options['http_port'])
cherrypy.config.update(options_dict)
conf = {

View File

@@ -1,17 +0,0 @@
"""
HTML parsing library based on the WHATWG "HTML5"
specification. The parser is designed to be compatible with existing
HTML found in the wild and implements well-defined error recovery that
is largely compatible with modern desktop web browsers.
Example usage:
import html5lib
f = open("my_document.html")
tree = html5lib.parse(f)
"""
__version__ = "0.95-dev"
from html5parser import HTMLParser, parse, parseFragment
from treebuilders import getTreeBuilder
from treewalkers import getTreeWalker
from serializer import serialize

File diff suppressed because it is too large Load Diff

View File

@@ -1,127 +0,0 @@
#
# The goal is to finally have a form filler where you pass data for
# each form, using the algorithm for "Seeding a form with initial values"
# See http://www.whatwg.org/specs/web-forms/current-work/#seeding
#
import _base
from html5lib.constants import spaceCharacters
spaceCharacters = u"".join(spaceCharacters)
class SimpleFilter(_base.Filter):
def __init__(self, source, fieldStorage):
_base.Filter.__init__(self, source)
self.fieldStorage = fieldStorage
def __iter__(self):
field_indices = {}
state = None
field_name = None
for token in _base.Filter.__iter__(self):
type = token["type"]
if type in ("StartTag", "EmptyTag"):
name = token["name"].lower()
if name == "input":
field_name = None
field_type = None
input_value_index = -1
input_checked_index = -1
for i,(n,v) in enumerate(token["data"]):
n = n.lower()
if n == u"name":
field_name = v.strip(spaceCharacters)
elif n == u"type":
field_type = v.strip(spaceCharacters)
elif n == u"checked":
input_checked_index = i
elif n == u"value":
input_value_index = i
value_list = self.fieldStorage.getlist(field_name)
field_index = field_indices.setdefault(field_name, 0)
if field_index < len(value_list):
value = value_list[field_index]
else:
value = ""
if field_type in (u"checkbox", u"radio"):
if value_list:
if token["data"][input_value_index][1] == value:
if input_checked_index < 0:
token["data"].append((u"checked", u""))
field_indices[field_name] = field_index + 1
elif input_checked_index >= 0:
del token["data"][input_checked_index]
elif field_type not in (u"button", u"submit", u"reset"):
if input_value_index >= 0:
token["data"][input_value_index] = (u"value", value)
else:
token["data"].append((u"value", value))
field_indices[field_name] = field_index + 1
field_type = None
field_name = None
elif name == "textarea":
field_type = "textarea"
field_name = dict((token["data"])[::-1])["name"]
elif name == "select":
field_type = "select"
attributes = dict(token["data"][::-1])
field_name = attributes.get("name")
is_select_multiple = "multiple" in attributes
is_selected_option_found = False
elif field_type == "select" and field_name and name == "option":
option_selected_index = -1
option_value = None
for i,(n,v) in enumerate(token["data"]):
n = n.lower()
if n == "selected":
option_selected_index = i
elif n == "value":
option_value = v.strip(spaceCharacters)
if option_value is None:
raise NotImplementedError("<option>s without a value= attribute")
else:
value_list = self.fieldStorage.getlist(field_name)
if value_list:
field_index = field_indices.setdefault(field_name, 0)
if field_index < len(value_list):
value = value_list[field_index]
else:
value = ""
if (is_select_multiple or not is_selected_option_found) and option_value == value:
if option_selected_index < 0:
token["data"].append((u"selected", u""))
field_indices[field_name] = field_index + 1
is_selected_option_found = True
elif option_selected_index >= 0:
del token["data"][option_selected_index]
elif field_type is not None and field_name and type == "EndTag":
name = token["name"].lower()
if name == field_type:
if name == "textarea":
value_list = self.fieldStorage.getlist(field_name)
if value_list:
field_index = field_indices.setdefault(field_name, 0)
if field_index < len(value_list):
value = value_list[field_index]
else:
value = ""
yield {"type": "Characters", "data": value}
field_indices[field_name] = field_index + 1
field_name = None
elif name == "option" and field_type == "select":
pass # TODO: part of "option without value= attribute" processing
elif field_type == "textarea":
continue # ignore token
yield token

View File

@@ -1,62 +0,0 @@
import _base
class Filter(_base.Filter):
def __init__(self, source, encoding):
_base.Filter.__init__(self, source)
self.encoding = encoding
def __iter__(self):
state = "pre_head"
meta_found = (self.encoding is None)
pending = []
for token in _base.Filter.__iter__(self):
type = token["type"]
if type == "StartTag":
if token["name"].lower() == u"head":
state = "in_head"
elif type == "EmptyTag":
if token["name"].lower() == u"meta":
# replace charset with actual encoding
has_http_equiv_content_type = False
for (namespace,name),value in token["data"].iteritems():
if namespace != None:
continue
elif name.lower() == u'charset':
token["data"][(namespace,name)] = self.encoding
meta_found = True
break
elif name == u'http-equiv' and value.lower() == u'content-type':
has_http_equiv_content_type = True
else:
if has_http_equiv_content_type and (None, u"content") in token["data"]:
token["data"][(None, u"content")] = u'text/html; charset=%s' % self.encoding
meta_found = True
elif token["name"].lower() == u"head" and not meta_found:
# insert meta into empty head
yield {"type": "StartTag", "name": u"head",
"data": token["data"]}
yield {"type": "EmptyTag", "name": u"meta",
"data": {(None, u"charset"): self.encoding}}
yield {"type": "EndTag", "name": u"head"}
meta_found = True
continue
elif type == "EndTag":
if token["name"].lower() == u"head" and pending:
# insert meta into head (if necessary) and flush pending queue
yield pending.pop(0)
if not meta_found:
yield {"type": "EmptyTag", "name": u"meta",
"data": {(None, u"charset"): self.encoding}}
while pending:
yield pending.pop(0)
meta_found = True
state = "post_head"
if state == "in_head":
pending.append(token)
else:
yield token

View File

@@ -1,8 +0,0 @@
import _base
from html5lib.sanitizer import HTMLSanitizerMixin
class Filter(_base.Filter, HTMLSanitizerMixin):
def __iter__(self):
for token in _base.Filter.__iter__(self):
token = self.sanitize_token(token)
if token: yield token

View File

@@ -1,177 +0,0 @@
import re
baseChar = """[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
combiningCharacter = """[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A"""
digit = """[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
extender = """#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
letter = " | ".join([baseChar, ideographic])
#Without the
name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
extender])
nameFirst = " | ".join([letter, "_"])
reChar = re.compile(r"#x([\d|A-F]{4,4})")
reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
def charStringToList(chars):
charRanges = [item.strip() for item in chars.split(" | ")]
rv = []
for item in charRanges:
foundMatch = False
for regexp in (reChar, reCharRange):
match = regexp.match(item)
if match is not None:
rv.append([hexToInt(item) for item in match.groups()])
if len(rv[-1]) == 1:
rv[-1] = rv[-1]*2
foundMatch = True
break
if not foundMatch:
assert len(item) == 1
rv.append([ord(item)] * 2)
rv = normaliseCharList(rv)
return rv
def normaliseCharList(charList):
charList = sorted(charList)
for item in charList:
assert item[1] >= item[0]
rv = []
i = 0
while i < len(charList):
j = 1
rv.append(charList[i])
while i + j < len(charList) and charList[i+j][0] <= rv[-1][1] + 1:
rv[-1][1] = charList[i+j][1]
j += 1
i += j
return rv
#We don't really support characters above the BMP :(
max_unicode = int("FFFF", 16)
def missingRanges(charList):
rv = []
if charList[0] != 0:
rv.append([0, charList[0][0] - 1])
for i, item in enumerate(charList[:-1]):
rv.append([item[1]+1, charList[i+1][0] - 1])
if charList[-1][1] != max_unicode:
rv.append([charList[-1][1] + 1, max_unicode])
return rv
def listToRegexpStr(charList):
rv = []
for item in charList:
if item[0] == item[1]:
rv.append(escapeRegexp(unichr(item[0])))
else:
rv.append(escapeRegexp(unichr(item[0])) + "-" +
escapeRegexp(unichr(item[1])))
return "[%s]"%"".join(rv)
def hexToInt(hex_str):
return int(hex_str, 16)
def escapeRegexp(string):
specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
"[", "]", "|", "(", ")", "-")
for char in specialCharacters:
string = string.replace(char, "\\" + char)
if char in string:
print string
return string
#output from the above
nonXmlNameBMPRegexp = re.compile(u'[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
nonXmlNameFirstBMPRegexp = re.compile(u'[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
class InfosetFilter(object):
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
def __init__(self, replaceChars = None,
dropXmlnsLocalName = False,
dropXmlnsAttrNs = False,
preventDoubleDashComments = False,
preventDashAtCommentEnd = False,
replaceFormFeedCharacters = True):
self.dropXmlnsLocalName = dropXmlnsLocalName
self.dropXmlnsAttrNs = dropXmlnsAttrNs
self.preventDoubleDashComments = preventDoubleDashComments
self.preventDashAtCommentEnd = preventDashAtCommentEnd
self.replaceFormFeedCharacters = replaceFormFeedCharacters
self.replaceCache = {}
def coerceAttribute(self, name, namespace=None):
if self.dropXmlnsLocalName and name.startswith("xmlns:"):
#Need a datalosswarning here
return None
elif (self.dropXmlnsAttrNs and
namespace == "http://www.w3.org/2000/xmlns/"):
return None
else:
return self.toXmlName(name)
def coerceElement(self, name, namespace=None):
return self.toXmlName(name)
def coerceComment(self, data):
if self.preventDoubleDashComments:
while "--" in data:
data = data.replace("--", "- -")
return data
def coerceCharacters(self, data):
if self.replaceFormFeedCharacters:
data = data.replace("\x0C", " ")
#Other non-xml characters
return data
def toXmlName(self, name):
nameFirst = name[0]
nameRest = name[1:]
m = nonXmlNameFirstBMPRegexp.match(nameFirst)
if m:
nameFirstOutput = self.getReplacementCharacter(nameFirst)
else:
nameFirstOutput = nameFirst
nameRestOutput = nameRest
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
for char in replaceChars:
replacement = self.getReplacementCharacter(char)
nameRestOutput = nameRestOutput.replace(char, replacement)
return nameFirstOutput + nameRestOutput
def getReplacementCharacter(self, char):
if char in self.replaceCache:
replacement = self.replaceCache[char]
else:
replacement = self.escapeChar(char)
return replacement
def fromXmlName(self, name):
for item in set(self.replacementRegexp.findall(name)):
name = name.replace(item, self.unescapeChar(item))
return name
def escapeChar(self, char):
replacement = "U" + hex(ord(char))[2:].upper().rjust(5, "0")
self.replaceCache[char] = replacement
return replacement
def unescapeChar(self, charcode):
return unichr(int(charcode[1:], 16))

View File

@@ -1,258 +0,0 @@
import re
from xml.sax.saxutils import escape, unescape
from tokenizer import HTMLTokenizer
from constants import tokenTypes
class HTMLSanitizerMixin(object):
""" sanitization of XHTML+MathML+SVG and of inline style attributes."""
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
'munderover', 'none']
svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
'background', 'balance', 'bgcolor', 'bgproperties', 'border',
'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
'optimum', 'pattern', 'ping', 'point-size', 'prompt', 'pqg',
'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
'width', 'wrap', 'xml:lang']
mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
'xlink:type', 'xmlns', 'xmlns:xlink']
svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
'arabic-form', 'ascent', 'attributeName', 'attributeType',
'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
'fill-opacity', 'fill-rule', 'font-family', 'font-size',
'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
'opacity', 'orient', 'origin', 'overline-position',
'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
'transform', 'type', 'u1', 'u2', 'underline-position',
'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
'y1', 'y2', 'zoomAndPan']
attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc',
'xlink:href', 'xml:base']
svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
'mask', 'stroke']
svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
'set', 'use']
acceptable_css_properties = ['azimuth', 'background-color',
'border-bottom-color', 'border-collapse', 'border-color',
'border-left-color', 'border-right-color', 'border-top-color', 'clear',
'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
'white-space', 'width']
acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
'transparent', 'underline', 'white', 'yellow']
acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule',
'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
'stroke-opacity']
acceptable_protocols = [ 'ed2k', 'ftp', 'http', 'https', 'irc',
'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
'ssh', 'sftp', 'rtsp', 'afs' ]
# subclasses may define their own versions of these constants
allowed_elements = acceptable_elements + mathml_elements + svg_elements
allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
allowed_css_properties = acceptable_css_properties
allowed_css_keywords = acceptable_css_keywords
allowed_svg_properties = acceptable_svg_properties
allowed_protocols = acceptable_protocols
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
# stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
# attributes are parsed, and a restricted set, # specified by
# ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
# attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
# in ALLOWED_PROTOCOLS are allowed.
#
# sanitize_html('<script> do_nasty_stuff() </script>')
# => &lt;script> do_nasty_stuff() &lt;/script>
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
# => <a>Click here for $100</a>
def sanitize_token(self, token):
# accommodate filters which use token_type differently
token_type = token["type"]
if token_type in tokenTypes.keys():
token_type = tokenTypes[token_type]
if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
tokenTypes["EmptyTag"]):
if token["name"] in self.allowed_elements:
if token.has_key("data"):
attrs = dict([(name,val) for name,val in
token["data"][::-1]
if name in self.allowed_attributes])
for attr in self.attr_val_is_uri:
if not attrs.has_key(attr):
continue
val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
unescape(attrs[attr])).lower()
#remove replacement characters from unescaped characters
val_unescaped = val_unescaped.replace(u"\ufffd", "")
if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and
(val_unescaped.split(':')[0] not in
self.allowed_protocols)):
del attrs[attr]
for attr in self.svg_attr_val_allows_ref:
if attr in attrs:
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
' ',
unescape(attrs[attr]))
if (token["name"] in self.svg_allow_local_href and
'xlink:href' in attrs and re.search('^\s*[^#\s].*',
attrs['xlink:href'])):
del attrs['xlink:href']
if attrs.has_key('style'):
attrs['style'] = self.sanitize_css(attrs['style'])
token["data"] = [[name,val] for name,val in attrs.items()]
return token
else:
if token_type == tokenTypes["EndTag"]:
token["data"] = "</%s>" % token["name"]
elif token["data"]:
attrs = ''.join([' %s="%s"' % (k,escape(v)) for k,v in token["data"]])
token["data"] = "<%s%s>" % (token["name"],attrs)
else:
token["data"] = "<%s>" % token["name"]
if token.get("selfClosing"):
token["data"]=token["data"][:-1] + "/>"
if token["type"] in tokenTypes.keys():
token["type"] = "Characters"
else:
token["type"] = tokenTypes["Characters"]
del token["name"]
return token
elif token_type == tokenTypes["Comment"]:
pass
else:
return token
def sanitize_css(self, style):
# disallow urls
style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style)
# gauntlet
if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return ''
if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): return ''
clean = []
for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style):
if not value: continue
if prop.lower() in self.allowed_css_properties:
clean.append(prop + ': ' + value + ';')
elif prop.split('-')[0].lower() in ['background','border','margin',
'padding']:
for keyword in value.split():
if not keyword in self.acceptable_css_keywords and \
not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$",keyword):
break
else:
clean.append(prop + ': ' + value + ';')
elif prop.lower() in self.allowed_svg_properties:
clean.append(prop + ': ' + value + ';')
return ' '.join(clean)
class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
lowercaseElementName=False, lowercaseAttrName=False, parser=None):
#Change case matching defaults as we only output lowercase html anyway
#This solution doesn't seem ideal...
HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
lowercaseElementName, lowercaseAttrName, parser=parser)
def __iter__(self):
for token in HTMLTokenizer.__iter__(self):
token = self.sanitize_token(token)
if token:
yield token

View File

@@ -1,17 +0,0 @@
from html5lib import treewalkers
from htmlserializer import HTMLSerializer
from xhtmlserializer import XHTMLSerializer
def serialize(input, tree="simpletree", format="html", encoding=None,
**serializer_opts):
# XXX: Should we cache this?
walker = treewalkers.getTreeWalker(tree)
if format == "html":
s = HTMLSerializer(**serializer_opts)
elif format == "xhtml":
s = XHTMLSerializer(**serializer_opts)
else:
raise ValueError, "type must be either html or xhtml"
return s.render(walker(input), encoding)

View File

@@ -1,9 +0,0 @@
from htmlserializer import HTMLSerializer
class XHTMLSerializer(HTMLSerializer):
quote_attr_values = True
minimize_boolean_attributes = False
use_trailing_solidus = True
escape_lt_in_attrs = True
omit_optional_tags = False
escape_rcdata = True

View File

@@ -1,12 +0,0 @@
import sys
import os
parent_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], ".."))
if not parent_path in sys.path:
sys.path.insert(0, parent_path)
del parent_path
from runtests import buildTestSuite
import support

View File

@@ -1,37 +0,0 @@
import sys
import os
if __name__ == '__main__':
#Allow us to import from the src directory
os.chdir(os.path.split(os.path.abspath(__file__))[0])
sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
from tokenizer import HTMLTokenizer
class HTMLParser(object):
""" Fake parser to test tokenizer output """
def parse(self, stream, output=True):
tokenizer = HTMLTokenizer(stream)
for token in tokenizer:
if output:
print token
if __name__ == "__main__":
x = HTMLParser()
if len(sys.argv) > 1:
if len(sys.argv) > 2:
import hotshot, hotshot.stats
prof = hotshot.Profile('stats.prof')
prof.runcall(x.parse, sys.argv[1], False)
prof.close()
stats = hotshot.stats.load('stats.prof')
stats.strip_dirs()
stats.sort_stats('time')
stats.print_stats()
else:
x.parse(sys.argv[1])
else:
print """Usage: python mockParser.py filename [stats]
If stats is specified the hotshots profiler will run and output the
stats instead.
"""

View File

@@ -1,27 +0,0 @@
import sys
import os
import glob
import unittest
#Allow us to import the parent module
os.chdir(os.path.split(os.path.abspath(__file__))[0])
sys.path.insert(0, os.path.abspath(os.curdir))
sys.path.insert(0, os.path.abspath(os.pardir))
sys.path.insert(0, os.path.join(os.path.abspath(os.pardir), "src"))
def buildTestSuite():
suite = unittest.TestSuite()
for testcase in glob.glob('test_*.py'):
if testcase in ("test_tokenizer.py", "test_parser.py", "test_parser2.py"):
module = os.path.splitext(testcase)[0]
suite.addTest(__import__(module).buildTestSuite())
return suite
def main():
results = unittest.TextTestRunner().run(buildTestSuite())
return results
if __name__ == "__main__":
results = main()
if not results.wasSuccessful():
sys.exit(1)

View File

@@ -1,20 +0,0 @@
import sys
import os
import glob
import unittest
def buildTestSuite():
suite = unittest.TestSuite()
for testcase in glob.glob('test_*.py'):
module = os.path.splitext(testcase)[0]
suite.addTest(__import__(module).buildTestSuite())
return suite
def main():
results = unittest.TextTestRunner().run(buildTestSuite())
return results
if __name__ == "__main__":
results = main()
if not results.wasSuccessful():
sys.exit(1)

View File

@@ -1,127 +0,0 @@
import os
import sys
import codecs
import glob
base_path = os.path.split(__file__)[0]
if os.path.exists(os.path.join(base_path, 'testdata')):
#release
test_dir = os.path.join(base_path, 'testdata')
else:
#development
test_dir = os.path.abspath(
os.path.join(base_path,
os.path.pardir, os.path.pardir,
os.path.pardir, 'testdata'))
assert os.path.exists(test_dir), "Test data not found"
#import the development html5lib
sys.path.insert(0, os.path.abspath(os.path.join(base_path,
os.path.pardir,
os.path.pardir)))
import html5lib
from html5lib import html5parser, treebuilders
del base_path
#Build a dict of avaliable trees
treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
"DOM":treebuilders.getTreeBuilder("dom")}
#Try whatever etree implementations are avaliable from a list that are
#"supposed" to work
try:
import xml.etree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
try:
import elementtree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
pass
try:
import xml.etree.cElementTree as cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
try:
import cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
pass
try:
import lxml.etree as lxml
treeTypes['lxml'] = treebuilders.getTreeBuilder("etree", lxml, fullTree=True)
except ImportError:
pass
try:
import BeautifulSoup
treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
except ImportError:
pass
def html5lib_test_files(subdirectory, files='*.dat'):
return glob.glob(os.path.join(test_dir,subdirectory,files))
class DefaultDict(dict):
def __init__(self, default, *args, **kwargs):
self.default = default
dict.__init__(self, *args, **kwargs)
def __getitem__(self, key):
return dict.get(self, key, self.default)
class TestData(object):
def __init__(self, filename, newTestHeading="data"):
self.f = codecs.open(filename, encoding="utf8")
self.newTestHeading = newTestHeading
def __iter__(self):
data = DefaultDict(None)
key=None
for line in self.f:
heading = self.isSectionHeading(line)
if heading:
if data and heading == self.newTestHeading:
#Remove trailing newline
data[key] = data[key][:-1]
yield self.normaliseOutput(data)
data = DefaultDict(None)
key = heading
data[key]=""
elif key is not None:
data[key] += line
if data:
yield self.normaliseOutput(data)
def isSectionHeading(self, line):
"""If the current heading is a test section heading return the heading,
otherwise return False"""
if line.startswith("#"):
return line[1:].strip()
else:
return False
def normaliseOutput(self, data):
#Remove trailing newlines
for key,value in data.iteritems():
if value.endswith("\n"):
data[key] = value[:-1]
return data
def convert(stripChars):
def convertData(data):
"""convert the output of str(document) to the format used in the testcases"""
data = data.split("\n")
rv = []
for line in data:
if line.startswith("|"):
rv.append(line[stripChars:])
else:
rv.append(line)
return "\n".join(rv)
return convertData
convertExpected = convert(2)

View File

@@ -1,54 +0,0 @@
import os
import unittest
from support import html5lib_test_files, TestData, test_dir
from html5lib import HTMLParser, inputstream
import re, unittest
class Html5EncodingTestCase(unittest.TestCase):
def test_codec_name(self):
self.assertEquals(inputstream.codecName("utf-8"), "utf-8")
self.assertEquals(inputstream.codecName("utf8"), "utf-8")
self.assertEquals(inputstream.codecName(" utf8 "), "utf-8")
self.assertEquals(inputstream.codecName("ISO_8859--1"), "windows-1252")
def buildTestSuite():
for filename in html5lib_test_files("encoding"):
test_name = os.path.basename(filename).replace('.dat',''). \
replace('-','')
tests = TestData(filename, "data")
for idx, test in enumerate(tests):
def encodingTest(self, data=test['data'],
encoding=test['encoding']):
p = HTMLParser()
t = p.parse(data, useChardet=False)
errorMessage = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n"%
(data, repr(encoding.lower()),
repr(p.tokenizer.stream.charEncoding)))
self.assertEquals(encoding.lower(),
p.tokenizer.stream.charEncoding[0],
errorMessage)
setattr(Html5EncodingTestCase, 'test_%s_%d' % (test_name, idx+1),
encodingTest)
try:
import chardet
def test_chardet(self):
data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt")).read()
encoding = inputstream.HTMLInputStream(data).charEncoding
assert encoding[0].lower() == "big5"
setattr(Html5EncodingTestCase, 'test_chardet', test_chardet)
except ImportError:
print "chardet not found, skipping chardet tests"
return unittest.defaultTestLoader.loadTestsFromName(__name__)
def main():
buildTestSuite()
unittest.main()
if __name__ == "__main__":
main()

View File

@@ -1,296 +0,0 @@
import sys
import unittest
from html5lib.filters.formfiller import SimpleFilter
class FieldStorage(dict):
def getlist(self, name):
l = self[name]
if isinstance(l, list):
return l
elif isinstance(l, tuple) or hasattr(l, '__iter__'):
return list(l)
return [l]
class TestCase(unittest.TestCase):
def runTest(self, input, formdata, expected):
try:
output = list(SimpleFilter(input, formdata))
except NotImplementedError, nie:
# Amnesty for those that confess...
print >>sys.stderr, "Not implemented:", str(nie)
else:
errorMsg = "\n".join(["\n\nInput:", str(input),
"\nForm data:", str(formdata),
"\nExpected:", str(expected),
"\nReceived:", str(output)])
self.assertEquals(output, expected, errorMsg)
def testSingleTextInputWithValue(self):
self.runTest(
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"text"), (u"name", u"foo"), (u"value", u"quux")]}],
FieldStorage({"foo": "bar"}),
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"text"), (u"name", u"foo"), (u"value", u"bar")]}])
def testSingleTextInputWithoutValue(self):
self.runTest(
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"text"), (u"name", u"foo")]}],
FieldStorage({"foo": "bar"}),
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"text"), (u"name", u"foo"), (u"value", u"bar")]}])
def testSingleCheckbox(self):
self.runTest(
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar")]}],
FieldStorage({"foo": "bar"}),
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar"), (u"checked", u"")]}])
def testSingleCheckboxShouldBeUnchecked(self):
self.runTest(
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux")]}],
FieldStorage({"foo": "bar"}),
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux")]}])
def testSingleCheckboxCheckedByDefault(self):
self.runTest(
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar"), (u"checked", u"")]}],
FieldStorage({"foo": "bar"}),
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar"), (u"checked", u"")]}])
def testSingleCheckboxCheckedByDefaultShouldBeUnchecked(self):
self.runTest(
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux"), (u"checked", u"")]}],
FieldStorage({"foo": "bar"}),
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux")]}])
def testSingleTextareaWithValue(self):
self.runTest(
[{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"textarea", "data": []}],
FieldStorage({"foo": "bar"}),
[{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"textarea", "data": []}])
def testSingleTextareaWithoutValue(self):
self.runTest(
[{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
{"type": u"EndTag", "name": u"textarea", "data": []}],
FieldStorage({"foo": "bar"}),
[{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"textarea", "data": []}])
def testSingleSelectWithValue(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "bar"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectWithValueShouldBeUnselected(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "quux"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectWithoutValue(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "bar"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"selected", u"")]},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectWithoutValueShouldBeUnselected(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "quux"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectTwoOptionsWithValue(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "bar"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectTwoOptionsWithValueShouldBeUnselected(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"baz")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "quux"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"baz")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectTwoOptionsWithoutValue(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "bar"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"selected", u"")]},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectTwoOptionsWithoutValueShouldBeUnselected(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"baz"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "quux"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"baz"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectMultiple(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo"), (u"multiple", u"")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": ["bar", "quux"]}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo"), (u"multiple", u"")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux"), (u"selected", u"")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testTwoSelect(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []},
{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": ["bar", "quux"]}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []},
{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux"), (u"selected", u"")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def buildTestSuite():
return unittest.defaultTestLoader.loadTestsFromName(__name__)
def main():
buildTestSuite()
unittest.main()
if __name__ == "__main__":
main()

View File

@@ -1,140 +0,0 @@
import os
import sys
import traceback
import StringIO
import warnings
import re
warnings.simplefilter("error")
from support import html5lib_test_files as data_files
from support import TestData, convert, convertExpected
import html5lib
from html5lib import html5parser, treebuilders, constants
treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
"DOM":treebuilders.getTreeBuilder("dom")}
#Try whatever etree implementations are avaliable from a list that are
#"supposed" to work
try:
import xml.etree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
try:
import elementtree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
pass
try:
import xml.etree.cElementTree as cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
try:
import cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
pass
try:
try:
import lxml.html as lxml
except ImportError:
import lxml.etree as lxml
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml", lxml, fullTree=True)
except ImportError:
pass
try:
import BeautifulSoup
treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
except ImportError:
pass
#Try whatever dom implementations are avaliable from a list that are
#"supposed" to work
try:
import pxdom
treeTypes["pxdom"] = treebuilders.getTreeBuilder("dom", pxdom)
except ImportError:
pass
#Run the parse error checks
checkParseErrors = False
#XXX - There should just be one function here but for some reason the testcase
#format differs from the treedump format by a single space character
def convertTreeDump(data):
return "\n".join(convert(3)(data).split("\n")[1:])
namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
def runParserTest(innerHTML, input, expected, errors, treeClass,
namespaceHTMLElements):
#XXX - move this out into the setup function
#concatenate all consecutive character tokens into a single token
try:
p = html5parser.HTMLParser(tree = treeClass,
namespaceHTMLElements=namespaceHTMLElements)
except constants.DataLossWarning:
return
try:
if innerHTML:
document = p.parseFragment(input, innerHTML)
else:
try:
document = p.parse(input)
except constants.DataLossWarning:
return
except:
errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
u"\nTraceback:", traceback.format_exc()])
assert False, errorMsg.encode("utf8")
output = convertTreeDump(p.tree.testSerializer(document))
expected = convertExpected(expected)
if namespaceHTMLElements:
expected = namespaceExpected(r"\1<html \2>", expected)
errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
u"\nReceived:", output])
assert expected == output, errorMsg.encode("utf8")
errStr = [u"Line: %i Col: %i %s"%(line, col,
constants.E[errorcode] % datavars if isinstance(datavars, dict) else (datavars,)) for
((line,col), errorcode, datavars) in p.errors]
errorMsg2 = u"\n".join([u"\n\nInput:", input,
u"\nExpected errors (" + str(len(errors)) + u"):\n" + u"\n".join(errors),
u"\nActual errors (" + str(len(p.errors)) + u"):\n" + u"\n".join(errStr)])
if checkParseErrors:
assert len(p.errors) == len(errors), errorMsg2.encode("utf-8")
def test_parser():
sys.stderr.write('Testing tree builders '+ " ".join(treeTypes.keys()) + "\n")
files = data_files('tree-construction')
for filename in files:
testName = os.path.basename(filename).replace(".dat","")
tests = TestData(filename, "data")
for index, test in enumerate(tests):
input, errors, innerHTML, expected = [test[key] for key in
'data', 'errors',
'document-fragment',
'document']
if errors:
errors = errors.split("\n")
for treeName, treeCls in treeTypes.iteritems():
for namespaceHTMLElements in (True, False):
print input
yield (runParserTest, innerHTML, input, expected, errors, treeCls,
namespaceHTMLElements)
break

View File

@@ -1,39 +0,0 @@
import support
from html5lib import html5parser
from html5lib.constants import namespaces
from html5lib.treebuilders import dom
import unittest
# tests that aren't autogenerated from text files
class MoreParserTests(unittest.TestCase):
def test_assertDoctypeCloneable(self):
parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
doc = parser.parse('<!DOCTYPE HTML>')
self.assert_(doc.cloneNode(True))
def test_line_counter(self):
# http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
parser.parse("<pre>\nx\n&gt;\n</pre>")
def test_namespace_html_elements_0(self):
parser = html5parser.HTMLParser(namespaceHTMLElements=True)
doc = parser.parse("<html></html>")
self.assert_(doc.childNodes[0].namespace == namespaces["html"])
def test_namespace_html_elements_1(self):
parser = html5parser.HTMLParser(namespaceHTMLElements=False)
doc = parser.parse("<html></html>")
self.assert_(doc.childNodes[0].namespace == None)
def buildTestSuite():
return unittest.defaultTestLoader.loadTestsFromName(__name__)
def main():
buildTestSuite()
unittest.main()
if __name__ == '__main__':
main()

View File

@@ -1,76 +0,0 @@
import os
import sys
import unittest
try:
import json
except ImportError:
import simplejson as json
from html5lib import html5parser, sanitizer, constants
def runSanitizerTest(name, expected, input):
expected = ''.join([token.toxml() for token in html5parser.HTMLParser().
parseFragment(expected).childNodes])
expected = json.loads(json.dumps(expected))
assert expected == sanitize_html(input)
def sanitize_html(stream):
return ''.join([token.toxml() for token in
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
parseFragment(stream).childNodes])
def test_should_handle_astral_plane_characters():
assert u"<p>\U0001d4b5 \U0001d538</p>" == sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
def test_sanitizer():
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']:
continue ### TODO
if tag_name != tag_name.lower():
continue ### TODO
if tag_name == 'image':
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
"<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
elif tag_name == 'br':
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
"<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
elif tag_name in constants.voidElements:
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
"<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
else:
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
"<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name,tag_name),
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
tag_name = tag_name.upper()
yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name,
"&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;" % (tag_name,tag_name),
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
if attribute_name != attribute_name.lower(): continue ### TODO
if attribute_name == 'style': continue
yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
"<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % attribute_name,
"<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name)
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
attribute_name = attribute_name.upper()
yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name,
"<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
"<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name)
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol,
"<a href=\"%s\">foo</a>" % protocol,
"""<a href="%s">foo</a>""" % protocol)
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
"<a href=\"%s\">foo</a>" % protocol,
"""<a href="%s">foo</a>""" % protocol)

View File

@@ -1,180 +0,0 @@
import os
import unittest
from support import html5lib_test_files
try:
import json
except ImportError:
import simplejson as json
import html5lib
from html5lib import html5parser, serializer, constants
from html5lib.treewalkers._base import TreeWalker
optionals_loaded = []
try:
from lxml import etree
optionals_loaded.append("lxml")
except ImportError:
pass
default_namespace = constants.namespaces["html"]
class JsonWalker(TreeWalker):
def __iter__(self):
for token in self.tree:
type = token[0]
if type == "StartTag":
if len(token) == 4:
namespace, name, attrib = token[1:4]
else:
namespace = default_namespace
name, attrib = token[1:3]
yield self.startTag(namespace, name, self._convertAttrib(attrib))
elif type == "EndTag":
if len(token) == 3:
namespace, name = token[1:3]
else:
namespace = default_namespace
name = token[1]
yield self.endTag(namespace, name)
elif type == "EmptyTag":
if len(token) == 4:
namespace, name, attrib = token[1:]
else:
namespace = default_namespace
name, attrib = token[1:]
for token in self.emptyTag(namespace, name, self._convertAttrib(attrib)):
yield token
elif type == "Comment":
yield self.comment(token[1])
elif type in ("Characters", "SpaceCharacters"):
for token in self.text(token[1]):
yield token
elif type == "Doctype":
if len(token) == 4:
yield self.doctype(token[1], token[2], token[3])
elif len(token) == 3:
yield self.doctype(token[1], token[2])
else:
yield self.doctype(token[1])
else:
raise ValueError("Unknown token type: " + type)
def _convertAttrib(self, attribs):
"""html5lib tree-walkers use a dict of (namespace, name): value for
attributes, but JSON cannot represent this. Convert from the format
in the serializer tests (a list of dicts with "namespace", "name",
and "value" as keys) to html5lib's tree-walker format."""
attrs = {}
for attrib in attribs:
name = (attrib["namespace"], attrib["name"])
assert(name not in attrs)
attrs[name] = attrib["value"]
return attrs
def serialize_html(input, options):
options = dict([(str(k),v) for k,v in options.iteritems()])
return serializer.HTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None))
def serialize_xhtml(input, options):
options = dict([(str(k),v) for k,v in options.iteritems()])
return serializer.XHTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None))
def make_test(input, expected, xhtml, options):
result = serialize_html(input, options)
if len(expected) == 1:
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:False\n%s"%(expected[0], result, str(options))
elif result not in expected:
assert False, "Expected: %s, Received: %s" % (expected, result)
if not xhtml:
return
result = serialize_xhtml(input, options)
if len(xhtml) == 1:
assert xhtml[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:True\n%s"%(xhtml[0], result, str(options))
elif result not in xhtml:
assert False, "Expected: %s, Received: %s" % (xhtml, result)
class EncodingTestCase(unittest.TestCase):
def throwsWithLatin1(self, input):
self.assertRaises(UnicodeEncodeError, serialize_html, input, {"encoding": "iso-8859-1"})
def testDoctypeName(self):
self.throwsWithLatin1([["Doctype", u"\u0101"]])
def testDoctypePublicId(self):
self.throwsWithLatin1([["Doctype", u"potato", u"\u0101"]])
def testDoctypeSystemId(self):
self.throwsWithLatin1([["Doctype", u"potato", u"potato", u"\u0101"]])
def testCdataCharacters(self):
self.assertEquals("<style>&amacr;", serialize_html([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}],
["Characters", u"\u0101"]],
{"encoding": "iso-8859-1"}))
def testCharacters(self):
self.assertEquals("&amacr;", serialize_html([["Characters", u"\u0101"]],
{"encoding": "iso-8859-1"}))
def testStartTagName(self):
self.throwsWithLatin1([["StartTag", u"http://www.w3.org/1999/xhtml", u"\u0101", []]])
def testEmptyTagName(self):
self.throwsWithLatin1([["EmptyTag", u"http://www.w3.org/1999/xhtml", u"\u0101", []]])
def testAttributeName(self):
self.throwsWithLatin1([["StartTag", u"http://www.w3.org/1999/xhtml", u"span", [{"namespace": None, "name": u"\u0101", "value": u"potato"}]]])
def testAttributeValue(self):
self.assertEquals("<span potato=&amacr;>", serialize_html([["StartTag", u"http://www.w3.org/1999/xhtml", u"span",
[{"namespace": None, "name": u"potato", "value": u"\u0101"}]]],
{"encoding": "iso-8859-1"}))
def testEndTagName(self):
self.throwsWithLatin1([["EndTag", u"http://www.w3.org/1999/xhtml", u"\u0101"]])
def testComment(self):
self.throwsWithLatin1([["Comment", u"\u0101"]])
if "lxml" in optionals_loaded:
class LxmlTestCase(unittest.TestCase):
def setUp(self):
self.parser = etree.XMLParser(resolve_entities=False)
self.treewalker = html5lib.getTreeWalker("lxml")
self.serializer = serializer.HTMLSerializer()
def testEntityReplacement(self):
doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
tree = etree.fromstring(doc, parser = self.parser).getroottree()
result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result)
def testEntityXML(self):
doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>"""
tree = etree.fromstring(doc, parser = self.parser).getroottree()
result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>""", result)
def testEntityNoResolve(self):
doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
tree = etree.fromstring(doc, parser = self.parser).getroottree()
result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False,
resolve_entities=False)
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""", result)
def test_serializer():
for filename in html5lib_test_files('serializer', '*.test'):
tests = json.load(file(filename))
test_name = os.path.basename(filename).replace('.test','')
for index, test in enumerate(tests['tests']):
xhtml = test.get("xhtml", test["expected"])
if test_name == 'optionaltags':
xhtml = None
yield make_test, test["input"], test["expected"], xhtml, test.get("options", {})

View File

@@ -1,97 +0,0 @@
import support
import unittest, codecs
from html5lib.inputstream import HTMLInputStream
class HTMLInputStreamShortChunk(HTMLInputStream):
_defaultChunkSize = 2
class HTMLInputStreamTest(unittest.TestCase):
def test_char_ascii(self):
stream = HTMLInputStream("'", encoding='ascii')
self.assertEquals(stream.charEncoding[0], 'ascii')
self.assertEquals(stream.char(), "'")
def test_char_null(self):
stream = HTMLInputStream("\x00")
self.assertEquals(stream.char(), u'\ufffd')
def test_char_utf8(self):
stream = HTMLInputStream(u'\u2018'.encode('utf-8'), encoding='utf-8')
self.assertEquals(stream.charEncoding[0], 'utf-8')
self.assertEquals(stream.char(), u'\u2018')
def test_char_win1252(self):
stream = HTMLInputStream(u"\xa9\xf1\u2019".encode('windows-1252'))
self.assertEquals(stream.charEncoding[0], 'windows-1252')
self.assertEquals(stream.char(), u"\xa9")
self.assertEquals(stream.char(), u"\xf1")
self.assertEquals(stream.char(), u"\u2019")
def test_bom(self):
stream = HTMLInputStream(codecs.BOM_UTF8 + "'")
self.assertEquals(stream.charEncoding[0], 'utf-8')
self.assertEquals(stream.char(), "'")
def test_utf_16(self):
stream = HTMLInputStream((' '*1025).encode('utf-16'))
self.assert_(stream.charEncoding[0] in ['utf-16-le', 'utf-16-be'], stream.charEncoding)
self.assertEquals(len(stream.charsUntil(' ', True)), 1025)
def test_newlines(self):
stream = HTMLInputStreamShortChunk(codecs.BOM_UTF8 + "a\nbb\r\nccc\rddddxe")
self.assertEquals(stream.position(), (1, 0))
self.assertEquals(stream.charsUntil('c'), u"a\nbb\n")
self.assertEquals(stream.position(), (3, 0))
self.assertEquals(stream.charsUntil('x'), u"ccc\ndddd")
self.assertEquals(stream.position(), (4, 4))
self.assertEquals(stream.charsUntil('e'), u"x")
self.assertEquals(stream.position(), (4, 5))
def test_newlines2(self):
size = HTMLInputStream._defaultChunkSize
stream = HTMLInputStream("\r" * size + "\n")
self.assertEquals(stream.charsUntil('x'), "\n" * size)
def test_position(self):
stream = HTMLInputStreamShortChunk(codecs.BOM_UTF8 + "a\nbb\nccc\nddde\nf\ngh")
self.assertEquals(stream.position(), (1, 0))
self.assertEquals(stream.charsUntil('c'), u"a\nbb\n")
self.assertEquals(stream.position(), (3, 0))
stream.unget(u"\n")
self.assertEquals(stream.position(), (2, 2))
self.assertEquals(stream.charsUntil('c'), u"\n")
self.assertEquals(stream.position(), (3, 0))
stream.unget(u"\n")
self.assertEquals(stream.position(), (2, 2))
self.assertEquals(stream.char(), u"\n")
self.assertEquals(stream.position(), (3, 0))
self.assertEquals(stream.charsUntil('e'), u"ccc\nddd")
self.assertEquals(stream.position(), (4, 3))
self.assertEquals(stream.charsUntil('h'), u"e\nf\ng")
self.assertEquals(stream.position(), (6, 1))
def test_position2(self):
stream = HTMLInputStreamShortChunk("abc\nd")
self.assertEquals(stream.position(), (1, 0))
self.assertEquals(stream.char(), u"a")
self.assertEquals(stream.position(), (1, 1))
self.assertEquals(stream.char(), u"b")
self.assertEquals(stream.position(), (1, 2))
self.assertEquals(stream.char(), u"c")
self.assertEquals(stream.position(), (1, 3))
self.assertEquals(stream.char(), u"\n")
self.assertEquals(stream.position(), (2, 0))
self.assertEquals(stream.char(), u"d")
self.assertEquals(stream.position(), (2, 1))
def buildTestSuite():
return unittest.defaultTestLoader.loadTestsFromName(__name__)
def main():
buildTestSuite()
unittest.main()
if __name__ == '__main__':
main()

View File

@@ -1,193 +0,0 @@
import sys
import os
import unittest
import cStringIO
import warnings
import re
try:
import json
except ImportError:
import simplejson as json
from support import html5lib_test_files
from html5lib.tokenizer import HTMLTokenizer
from html5lib import constants
class TokenizerTestParser(object):
def __init__(self, initialState, lastStartTag=None):
self.tokenizer = HTMLTokenizer
self._state = initialState
self._lastStartTag = lastStartTag
def parse(self, stream, encoding=None, innerHTML=False):
tokenizer = self.tokenizer(stream, encoding)
self.outputTokens = []
tokenizer.state = getattr(tokenizer, self._state)
if self._lastStartTag is not None:
tokenizer.currentToken = {"type": "startTag",
"name":self._lastStartTag}
types = dict((v,k) for k,v in constants.tokenTypes.iteritems())
for token in tokenizer:
getattr(self, 'process%s' % types[token["type"]])(token)
return self.outputTokens
def processDoctype(self, token):
self.outputTokens.append([u"DOCTYPE", token["name"], token["publicId"],
token["systemId"], token["correct"]])
def processStartTag(self, token):
self.outputTokens.append([u"StartTag", token["name"],
dict(token["data"][::-1]), token["selfClosing"]])
def processEmptyTag(self, token):
if token["name"] not in constants.voidElements:
self.outputTokens.append(u"ParseError")
self.outputTokens.append([u"StartTag", token["name"], dict(token["data"][::-1])])
def processEndTag(self, token):
self.outputTokens.append([u"EndTag", token["name"],
token["selfClosing"]])
def processComment(self, token):
self.outputTokens.append([u"Comment", token["data"]])
def processSpaceCharacters(self, token):
self.outputTokens.append([u"Character", token["data"]])
self.processSpaceCharacters = self.processCharacters
def processCharacters(self, token):
self.outputTokens.append([u"Character", token["data"]])
def processEOF(self, token):
pass
def processParseError(self, token):
self.outputTokens.append([u"ParseError", token["data"]])
def concatenateCharacterTokens(tokens):
outputTokens = []
for token in tokens:
if not "ParseError" in token and token[0] == "Character":
if (outputTokens and not "ParseError" in outputTokens[-1] and
outputTokens[-1][0] == "Character"):
outputTokens[-1][1] += token[1]
else:
outputTokens.append(token)
else:
outputTokens.append(token)
return outputTokens
def normalizeTokens(tokens):
# TODO: convert tests to reflect arrays
for i, token in enumerate(tokens):
if token[0] == u'ParseError':
tokens[i] = token[0]
return tokens
def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
ignoreErrors=False):
"""Test whether the test has passed or failed
If the ignoreErrorOrder flag is set to true we don't test the relative
positions of parse errors and non parse errors
"""
checkSelfClosing= False
for token in expectedTokens:
if (token[0] == "StartTag" and len(token) == 4
or token[0] == "EndTag" and len(token) == 3):
checkSelfClosing = True
break
if not checkSelfClosing:
for token in receivedTokens:
if token[0] == "StartTag" or token[0] == "EndTag":
token.pop()
if not ignoreErrorOrder and not ignoreErrors:
return expectedTokens == receivedTokens
else:
#Sort the tokens into two groups; non-parse errors and parse errors
tokens = {"expected":[[],[]], "received":[[],[]]}
for tokenType, tokenList in zip(tokens.keys(),
(expectedTokens, receivedTokens)):
for token in tokenList:
if token != "ParseError":
tokens[tokenType][0].append(token)
else:
if not ignoreErrors:
tokens[tokenType][1].append(token)
return tokens["expected"] == tokens["received"]
def unescape_test(test):
def decode(inp):
return inp.decode("unicode-escape")
test["input"] = decode(test["input"])
for token in test["output"]:
if token == "ParseError":
continue
else:
token[1] = decode(token[1])
if len(token) > 2:
for key, value in token[2]:
del token[2][key]
token[2][decode(key)] = decode(value)
return test
def runTokenizerTest(test):
#XXX - move this out into the setup function
#concatenate all consecutive character tokens into a single token
if 'doubleEscaped' in test:
test = unescape_test(test)
expected = concatenateCharacterTokens(test['output'])
if 'lastStartTag' not in test:
test['lastStartTag'] = None
outBuffer = cStringIO.StringIO()
stdout = sys.stdout
sys.stdout = outBuffer
parser = TokenizerTestParser(test['initialState'],
test['lastStartTag'])
tokens = parser.parse(test['input'])
tokens = concatenateCharacterTokens(tokens)
received = normalizeTokens(tokens)
errorMsg = u"\n".join(["\n\nInitial state:",
test['initialState'] ,
"\nInput:", unicode(test['input']),
"\nExpected:", unicode(expected),
"\nreceived:", unicode(tokens)])
errorMsg = errorMsg.encode("utf-8")
ignoreErrorOrder = test.get('ignoreErrorOrder', False)
assert tokensMatch(expected, received, ignoreErrorOrder), errorMsg
def _doCapitalize(match):
return match.group(1).upper()
_capitalizeRe = re.compile(r"\W+(\w)").sub
def capitalize(s):
s = s.lower()
s = _capitalizeRe(_doCapitalize, s)
return s
def test_tokenizer():
for filename in html5lib_test_files('tokenizer', '*.test'):
tests = json.load(file(filename))
testName = os.path.basename(filename).replace(".test","")
if 'tests' in tests:
for index,test in enumerate(tests['tests']):
#Skip tests with a self closing flag
skip = False
if 'initialStates' not in test:
test["initialStates"] = ["Data state"]
for initialState in test["initialStates"]:
test["initialState"] = capitalize(initialState)
yield runTokenizerTest, test

View File

@@ -1,311 +0,0 @@
import os
import sys
import StringIO
import unittest
import warnings
warnings.simplefilter("error")
from support import html5lib_test_files, TestData, convertExpected
from html5lib import html5parser, treewalkers, treebuilders, constants
from html5lib.filters.lint import Filter as LintFilter, LintError
def PullDOMAdapter(node):
from xml.dom import Node
from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, COMMENT, CHARACTERS
if node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
for childNode in node.childNodes:
for event in PullDOMAdapter(childNode):
yield event
elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
raise NotImplementedError("DOCTYPE nodes are not supported by PullDOM")
elif node.nodeType == Node.COMMENT_NODE:
yield COMMENT, node
elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
yield CHARACTERS, node
elif node.nodeType == Node.ELEMENT_NODE:
yield START_ELEMENT, node
for childNode in node.childNodes:
for event in PullDOMAdapter(childNode):
yield event
yield END_ELEMENT, node
else:
raise NotImplementedError("Node type not supported: " + str(node.nodeType))
treeTypes = {
"simpletree": {"builder": treebuilders.getTreeBuilder("simpletree"),
"walker": treewalkers.getTreeWalker("simpletree")},
"DOM": {"builder": treebuilders.getTreeBuilder("dom"),
"walker": treewalkers.getTreeWalker("dom")},
"PullDOM": {"builder": treebuilders.getTreeBuilder("dom"),
"adapter": PullDOMAdapter,
"walker": treewalkers.getTreeWalker("pulldom")},
}
#Try whatever etree implementations are available from a list that are
#"supposed" to work
try:
import xml.etree.ElementTree as ElementTree
treeTypes['ElementTree'] = \
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
except ImportError:
try:
import elementtree.ElementTree as ElementTree
treeTypes['ElementTree'] = \
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
except ImportError:
pass
try:
import xml.etree.cElementTree as ElementTree
treeTypes['cElementTree'] = \
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
except ImportError:
try:
import cElementTree as ElementTree
treeTypes['cElementTree'] = \
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
except ImportError:
pass
try:
import lxml.etree as ElementTree
# treeTypes['lxml_as_etree'] = \
# {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
# "walker": treewalkers.getTreeWalker("etree", ElementTree)}
treeTypes['lxml_native'] = \
{"builder": treebuilders.getTreeBuilder("lxml"),
"walker": treewalkers.getTreeWalker("lxml")}
except ImportError:
pass
try:
import BeautifulSoup
treeTypes["beautifulsoup"] = \
{"builder": treebuilders.getTreeBuilder("beautifulsoup"),
"walker": treewalkers.getTreeWalker("beautifulsoup")}
except ImportError:
pass
#Try whatever etree implementations are available from a list that are
#"supposed" to work
try:
import pxdom
treeTypes['pxdom'] = \
{"builder": treebuilders.getTreeBuilder("dom", pxdom),
"walker": treewalkers.getTreeWalker("dom")}
except ImportError:
pass
try:
from genshi.core import QName, Attrs
from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
def GenshiAdapter(tree):
text = None
for token in treewalkers.getTreeWalker("simpletree")(tree):
type = token["type"]
if type in ("Characters", "SpaceCharacters"):
if text is None:
text = token["data"]
else:
text += token["data"]
elif text is not None:
yield TEXT, text, (None, -1, -1)
text = None
if type in ("StartTag", "EmptyTag"):
if token["namespace"]:
name = u"{%s}%s" % (token["namespace"], token["name"])
else:
name = token["name"]
yield (START,
(QName(name),
Attrs([(QName(attr),value) for attr,value in token["data"]])),
(None, -1, -1))
if type == "EmptyTag":
type = "EndTag"
if type == "EndTag":
yield END, QName(token["name"]), (None, -1, -1)
elif type == "Comment":
yield COMMENT, token["data"], (None, -1, -1)
elif type == "Doctype":
yield DOCTYPE, (token["name"], token["publicId"],
token["systemId"]), (None, -1, -1)
else:
pass # FIXME: What to do?
if text is not None:
yield TEXT, text, (None, -1, -1)
#treeTypes["genshi"] = \
# {"builder": treebuilders.getTreeBuilder("simpletree"),
# "adapter": GenshiAdapter,
# "walker": treewalkers.getTreeWalker("genshi")}
except ImportError:
pass
def concatenateCharacterTokens(tokens):
charactersToken = None
for token in tokens:
type = token["type"]
if type in ("Characters", "SpaceCharacters"):
if charactersToken is None:
charactersToken = {"type": "Characters", "data": token["data"]}
else:
charactersToken["data"] += token["data"]
else:
if charactersToken is not None:
yield charactersToken
charactersToken = None
yield token
if charactersToken is not None:
yield charactersToken
def convertTokens(tokens):
output = []
indent = 0
for token in concatenateCharacterTokens(tokens):
type = token["type"]
if type in ("StartTag", "EmptyTag"):
if (token["namespace"] and
token["namespace"] != constants.namespaces["html"]):
if token["namespace"] in constants.prefixes:
name = constants.prefixes[token["namespace"]]
else:
name = token["namespace"]
name += u" " + token["name"]
else:
name = token["name"]
output.append(u"%s<%s>" % (" "*indent, name))
indent += 2
attrs = token["data"]
if attrs:
#TODO: Remove this if statement, attrs should always exist
for (namespace,name),value in sorted(attrs.items()):
if namespace:
if namespace in constants.prefixes:
outputname = constants.prefixes[namespace]
else:
outputname = namespace
outputname += u" " + name
else:
outputname = name
output.append(u"%s%s=\"%s\"" % (" "*indent, outputname, value))
if type == "EmptyTag":
indent -= 2
elif type == "EndTag":
indent -= 2
elif type == "Comment":
output.append("%s<!-- %s -->" % (" "*indent, token["data"]))
elif type == "Doctype":
if token["name"]:
if token["publicId"]:
output.append("""%s<!DOCTYPE %s "%s" "%s">"""%
(" "*indent, token["name"],
token["publicId"],
token["systemId"] and token["systemId"] or ""))
elif token["systemId"]:
output.append("""%s<!DOCTYPE %s "" "%s">"""%
(" "*indent, token["name"],
token["systemId"]))
else:
output.append("%s<!DOCTYPE %s>"%(" "*indent,
token["name"]))
else:
output.append("%s<!DOCTYPE >" % (" "*indent,))
elif type in ("Characters", "SpaceCharacters"):
output.append("%s\"%s\"" % (" "*indent, token["data"]))
else:
pass # TODO: what to do with errors?
return u"\n".join(output)
import re
attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+",re.M)
def sortattrs(x):
lines = x.group(0).split("\n")
lines.sort()
return "\n".join(lines)
class TokenTestCase(unittest.TestCase):
def test_all_tokens(self):
expected = [
{'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'html'},
{'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'head'},
{'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'head'},
{'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'body'},
{'data': u'a', 'type': 'Characters'},
{'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'div'},
{'data': u'b', 'type': 'Characters'},
{'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'div'},
{'data': u'c', 'type': 'Characters'},
{'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'body'},
{'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'html'}
]
for treeName, treeCls in treeTypes.iteritems():
p = html5parser.HTMLParser(tree = treeCls["builder"])
document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
document = treeCls.get("adapter", lambda x: x)(document)
output = treeCls["walker"](document)
for expectedToken, outputToken in zip(expected, output):
self.assertEquals(expectedToken, outputToken)
def run_test(innerHTML, input, expected, errors, treeClass):
try:
p = html5parser.HTMLParser(tree = treeClass["builder"])
if innerHTML:
document = p.parseFragment(StringIO.StringIO(input), innerHTML)
else:
document = p.parse(StringIO.StringIO(input))
except constants.DataLossWarning:
#Ignore testcases we know we don't pass
return
document = treeClass.get("adapter", lambda x: x)(document)
try:
output = convertTokens(treeClass["walker"](document))
output = attrlist.sub(sortattrs, output)
expected = attrlist.sub(sortattrs, convertExpected(expected))
assert expected == output, "\n".join([
"", "Input:", input,
"", "Expected:", expected,
"", "Received:", output
])
except NotImplementedError:
pass # Amnesty for those that confess...
def test_treewalker():
sys.stdout.write('Testing tree walkers '+ " ".join(treeTypes.keys()) + "\n")
for treeName, treeCls in treeTypes.iteritems():
files = html5lib_test_files('tree-construction')
for filename in files:
testName = os.path.basename(filename).replace(".dat","")
tests = TestData(filename, "data")
for index, test in enumerate(tests):
(input, errors,
innerHTML, expected) = [test[key] for key in ("data", "errors",
"document-fragment",
"document")]
errors = errors.split("\n")
yield run_test, innerHTML, input, expected, errors, treeCls

View File

@@ -1,123 +0,0 @@
import unittest
from html5lib.filters.whitespace import Filter
from html5lib.constants import spaceCharacters
spaceCharacters = u"".join(spaceCharacters)
class TestCase(unittest.TestCase):
def runTest(self, input, expected):
output = list(Filter(input))
errorMsg = "\n".join(["\n\nInput:", str(input),
"\nExpected:", str(expected),
"\nReceived:", str(output)])
self.assertEquals(output, expected, errorMsg)
def runTestUnmodifiedOutput(self, input):
self.runTest(input, input)
def testPhrasingElements(self):
self.runTestUnmodifiedOutput(
[{"type": u"Characters", "data": u"This is a " },
{"type": u"StartTag", "name": u"span", "data": [] },
{"type": u"Characters", "data": u"phrase" },
{"type": u"EndTag", "name": u"span", "data": []},
{"type": u"SpaceCharacters", "data": u" " },
{"type": u"Characters", "data": u"with" },
{"type": u"SpaceCharacters", "data": u" " },
{"type": u"StartTag", "name": u"em", "data": [] },
{"type": u"Characters", "data": u"emphasised text" },
{"type": u"EndTag", "name": u"em", "data": []},
{"type": u"Characters", "data": u" and an " },
{"type": u"StartTag", "name": u"img", "data": [[u"alt", u"image"]] },
{"type": u"Characters", "data": u"." }])
def testLeadingWhitespace(self):
self.runTest(
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"SpaceCharacters", "data": spaceCharacters},
{"type": u"Characters", "data": u"foo"},
{"type": u"EndTag", "name": u"p", "data": []}],
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"SpaceCharacters", "data": u" "},
{"type": u"Characters", "data": u"foo"},
{"type": u"EndTag", "name": u"p", "data": []}])
def testLeadingWhitespaceAsCharacters(self):
self.runTest(
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": spaceCharacters + u"foo"},
{"type": u"EndTag", "name": u"p", "data": []}],
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u" foo"},
{"type": u"EndTag", "name": u"p", "data": []}])
def testTrailingWhitespace(self):
self.runTest(
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u"foo"},
{"type": u"SpaceCharacters", "data": spaceCharacters},
{"type": u"EndTag", "name": u"p", "data": []}],
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u"foo"},
{"type": u"SpaceCharacters", "data": u" "},
{"type": u"EndTag", "name": u"p", "data": []}])
def testTrailingWhitespaceAsCharacters(self):
self.runTest(
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u"foo" + spaceCharacters},
{"type": u"EndTag", "name": u"p", "data": []}],
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u"foo "},
{"type": u"EndTag", "name": u"p", "data": []}])
def testWhitespace(self):
self.runTest(
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u"foo" + spaceCharacters + "bar"},
{"type": u"EndTag", "name": u"p", "data": []}],
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u"foo bar"},
{"type": u"EndTag", "name": u"p", "data": []}])
def testLeadingWhitespaceInPre(self):
self.runTestUnmodifiedOutput(
[{"type": u"StartTag", "name": u"pre", "data": []},
{"type": u"SpaceCharacters", "data": spaceCharacters},
{"type": u"Characters", "data": u"foo"},
{"type": u"EndTag", "name": u"pre", "data": []}])
def testLeadingWhitespaceAsCharactersInPre(self):
self.runTestUnmodifiedOutput(
[{"type": u"StartTag", "name": u"pre", "data": []},
{"type": u"Characters", "data": spaceCharacters + u"foo"},
{"type": u"EndTag", "name": u"pre", "data": []}])
def testTrailingWhitespaceInPre(self):
self.runTestUnmodifiedOutput(
[{"type": u"StartTag", "name": u"pre", "data": []},
{"type": u"Characters", "data": u"foo"},
{"type": u"SpaceCharacters", "data": spaceCharacters},
{"type": u"EndTag", "name": u"pre", "data": []}])
def testTrailingWhitespaceAsCharactersInPre(self):
self.runTestUnmodifiedOutput(
[{"type": u"StartTag", "name": u"pre", "data": []},
{"type": u"Characters", "data": u"foo" + spaceCharacters},
{"type": u"EndTag", "name": u"pre", "data": []}])
def testWhitespaceInPre(self):
self.runTestUnmodifiedOutput(
[{"type": u"StartTag", "name": u"pre", "data": []},
{"type": u"Characters", "data": u"foo" + spaceCharacters + "bar"},
{"type": u"EndTag", "name": u"pre", "data": []}])
def buildTestSuite():
return unittest.defaultTestLoader.loadTestsFromName(__name__)
def main():
buildTestSuite()
unittest.main()
if __name__ == "__main__":
main()

View File

@@ -1,10 +0,0 @@
#data
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
<!--京-->
<title>Yahoo! JAPAN</title>
<meta name="description" content="日本最大級のポータルサイト。検索、オークション、ニュース、メール、コミュニティ、ショッピング、など80以上のサービスを展開。あなたの生活をより豊かにする「ライフ・エンジン」を目指していきます。">
<style type="text/css" media="all">
#encoding
euc_jp

File diff suppressed because one or more lines are too long

View File

@@ -1,115 +0,0 @@
#data
<meta
#encoding
windows-1252
#data
<
#encoding
windows-1252
#data
<!
#encoding
windows-1252
#data
<meta charset = "
#encoding
windows-1252
#data
<meta charset=euc_jp
#encoding
windows-1252
#data
<meta <meta charset='euc_jp'>
#encoding
euc_jp
#data
<meta charset = 'euc_jp'>
#encoding
euc_jp
#data
<!-- -->
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
#encoding
utf-8
#data
<!-- -->
<meta http-equiv="Content-Type" content="text/html; charset=utf
#encoding
windows-1252
#data
<meta http-equiv="Content-Type<meta charset="utf-8">
#encoding
windows-1252
#data
<meta http-equiv="Content-Type" content="text/html; charset='utf-8'">
#encoding
utf-8
#data
<meta http-equiv="Content-Type" content="text/html; charset='utf-8">
#encoding
windows-1252
#data
<meta
#encoding
windows-1252
#data
<meta charset =
#encoding
windows-1252
#data
<meta charset= utf-8
>
#encoding
utf-8
#data
<meta content = "text/html;
#encoding
windows-1252
#data
<meta charset="UTF-16">
#encoding
utf-8
#data
<meta charset="UTF-16LE">
#encoding
utf-8
#data
<meta charset="UTF-16BE">
#encoding
utf-8
#data
<html a=ñ>
<meta charset="utf-8">
#encoding
utf-8
#data
<html ñ>
<meta charset="utf-8">
#encoding
utf-8
#data
<html>ñ
<meta charset="utf-8">
#encoding
utf-8

View File

@@ -1,501 +0,0 @@
[
{
"name": "IE_Comments",
"input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->",
"output": ""
},
{
"name": "IE_Comments_2",
"input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>",
"output": "&lt;script&gt;alert('XSS');&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "allow_colons_in_path_component",
"input": "<a href=\"./this:that\">foo</a>",
"output": "<a href='./this:that'>foo</a>"
},
{
"name": "background_attribute",
"input": "<div background=\"javascript:alert('XSS')\"></div>",
"output": "<div/>",
"xhtml": "<div></div>",
"rexml": "<div></div>"
},
{
"name": "bgsound",
"input": "<bgsound src=\"javascript:alert('XSS');\" />",
"output": "&lt;bgsound src=\"javascript:alert('XSS');\"/&gt;",
"rexml": "&lt;bgsound src=\"javascript:alert('XSS');\"&gt;&lt;/bgsound&gt;"
},
{
"name": "div_background_image_unicode_encoded",
"input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
"output": "<div style=''>foo</div>"
},
{
"name": "div_expression",
"input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
"output": "<div style=''>foo</div>"
},
{
"name": "double_open_angle_brackets",
"input": "<img src=http://ha.ckers.org/scriptlet.html <",
"output": "<img src='http://ha.ckers.org/scriptlet.html'>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "double_open_angle_brackets_2",
"input": "<script src=http://ha.ckers.org/scriptlet.html <",
"output": "&lt;script src=\"http://ha.ckers.org/scriptlet.html\" &lt;=\"\"&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "grave_accents",
"input": "<img src=`javascript:alert('XSS')` />",
"output": "<img/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "img_dynsrc_lowsrc",
"input": "<img dynsrc=\"javascript:alert('XSS')\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "img_vbscript",
"input": "<img src='vbscript:msgbox(\"XSS\")' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "input_image",
"input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />",
"output": "<input type='image'/>",
"rexml": "<input type='image' />"
},
{
"name": "link_stylesheets",
"input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />",
"output": "&lt;link rel=\"stylesheet\" href=\"javascript:alert('XSS');\"/&gt;",
"rexml": "&lt;link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"/&gt;"
},
{
"name": "link_stylesheets_2",
"input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />",
"output": "&lt;link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\"/&gt;",
"rexml": "&lt;link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"/&gt;"
},
{
"name": "list_style_image",
"input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
"output": "<li style=''>foo</li>"
},
{
"name": "no_closing_script_tags",
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
"output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit",
"input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>",
"output": "&lt;script XSS=\"\" src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit_2",
"input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
"output": "<a>foo</a>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit_3",
"input": "<img/src=\"http://ha.ckers.org/xss.js\"/>",
"output": "<img src='http://ha.ckers.org/xss.js'/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit_II",
"input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
"output": "<a>foo</a>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit_III",
"input": "<a/href=\"javascript:alert('XSS');\">foo</a>",
"output": "<a>foo</a>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "platypus",
"input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
"output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
},
{
"name": "protocol_resolution_in_script_tag",
"input": "<script src=//ha.ckers.org/.j></script>",
"output": "&lt;script src=\"//ha.ckers.org/.j\"&gt;&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_allow_anchors",
"input": "<a href='foo' onclick='bar'><script>baz</script></a>",
"output": "<a href='foo'>&lt;script&gt;baz&lt;/script&gt;</a>"
},
{
"name": "should_allow_image_alt_attribute",
"input": "<img alt='foo' onclick='bar' />",
"output": "<img alt='foo'/>",
"rexml": "<img alt='foo' />"
},
{
"name": "should_allow_image_height_attribute",
"input": "<img height='foo' onclick='bar' />",
"output": "<img height='foo'/>",
"rexml": "<img height='foo' />"
},
{
"name": "should_allow_image_src_attribute",
"input": "<img src='foo' onclick='bar' />",
"output": "<img src='foo'/>",
"rexml": "<img src='foo' />"
},
{
"name": "should_allow_image_width_attribute",
"input": "<img width='foo' onclick='bar' />",
"output": "<img width='foo'/>",
"rexml": "<img width='foo' />"
},
{
"name": "should_handle_blank_text",
"input": "",
"output": ""
},
{
"name": "should_handle_malformed_image_tags",
"input": "<img \"\"\"><script>alert(\"XSS\")</script>\">",
"output": "<img/>&lt;script&gt;alert(\"XSS\")&lt;/script&gt;\"&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_handle_non_html",
"input": "abc",
"output": "abc"
},
{
"name": "should_not_fall_for_ridiculous_hack",
"input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_0",
"input": "<img src=\"javascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_1",
"input": "<img src=javascript:alert('XSS') />",
"output": "<img/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_not_fall_for_xss_image_hack_10",
"input": "<img src=\"jav&#x0A;ascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_11",
"input": "<img src=\"jav&#x0D;ascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_12",
"input": "<img src=\" &#14; javascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_13",
"input": "<img src=\"&#x20;javascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_14",
"input": "<img src=\"&#xA0;javascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_2",
"input": "<img src=\"JaVaScRiPt:alert('XSS')\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_3",
"input": "<img src='javascript:alert(&quot;XSS&quot;)' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_4",
"input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_5",
"input": "<img src='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_6",
"input": "<img src='&#0000106;&#0000097;&#0000118;&#0000097;&#0000115;&#0000099;&#0000114;&#0000105;&#0000112;&#0000116;&#0000058;&#0000097;&#0000108;&#0000101;&#0000114;&#0000116;&#0000040;&#0000039;&#0000088;&#0000083;&#0000083;&#0000039;&#0000041' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_7",
"input": "<img src='&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_8",
"input": "<img src=\"jav\tascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_9",
"input": "<img src=\"jav&#x09;ascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_sanitize_half_open_scripts",
"input": "<img src=\"javascript:alert('XSS')\"",
"output": "<img/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_invalid_script_tag",
"input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>",
"output": "&lt;script XSS=\"\" SRC=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_script_tag_with_multiple_open_brackets",
"input": "<<script>alert(\"XSS\");//<</script>",
"output": "&lt;&lt;script&gt;alert(\"XSS\");//&lt;&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
"input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
"output": "&lt;iframe src=\"http://ha.ckers.org/scriptlet.html\" &lt;=\"\"&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_tag_broken_up_by_null",
"input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
"output": "&lt;scr\ufffdipt&gt;alert(\"XSS\")&lt;/scr\ufffdipt&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_unclosed_script",
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
"output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_strip_href_attribute_in_a_with_bad_protocols",
"input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>",
"output": "<a title='1'>boo</a>"
},
{
"name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace",
"input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>",
"output": "<a title='1'>boo</a>"
},
{
"name": "should_strip_src_attribute_in_img_with_bad_protocols",
"input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>",
"output": "<img title='1'/>boo",
"rexml": "<img title='1' />"
},
{
"name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace",
"input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>",
"output": "<img title='1'/>boo",
"rexml": "<img title='1' />"
},
{
"name": "xml_base",
"input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>",
"output": "<div>foo</div>"
},
{
"name": "xul",
"input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
"output": "<p style=''>fubar</p>"
},
{
"name": "quotes_in_attributes",
"input": "<img src='foo' title='\"foo\" bar' />",
"rexml": "<img src='foo' title='\"foo\" bar' />",
"output": "<img title='&quot;foo&quot; bar' src='foo'/>"
},
{
"name": "uri_refs_in_svg_attributes",
"input": "<rect fill='url(#foo)' />",
"rexml": "<rect fill='url(#foo)'></rect>",
"xhtml": "<rect fill='url(#foo)'></rect>",
"output": "<rect fill='url(#foo)'/>"
},
{
"name": "absolute_uri_refs_in_svg_attributes",
"input": "<rect fill='url(http://bad.com/) #fff' />",
"rexml": "<rect fill=' #fff'></rect>",
"xhtml": "<rect fill=' #fff'></rect>",
"output": "<rect fill=' #fff'/>"
},
{
"name": "uri_ref_with_space_in svg_attribute",
"input": "<rect fill='url(\n#foo)' />",
"rexml": "<rect fill='url(\n#foo)'></rect>",
"xhtml": "<rect fill='url(\n#foo)'></rect>",
"output": "<rect fill='url(\n#foo)'/>"
},
{
"name": "absolute_uri_ref_with_space_in svg_attribute",
"input": "<rect fill=\"url(\nhttp://bad.com/)\" />",
"rexml": "<rect fill=' '></rect>",
"xhtml": "<rect fill=' '></rect>",
"output": "<rect fill=' '/>"
},
{
"name": "allow_html5_image_tag",
"input": "<image src='foo' />",
"rexml": "&lt;image src=\"foo\"&gt;&lt;/image&gt;",
"output": "&lt;image src=\"foo\"/&gt;"
},
{
"name": "style_attr_end_with_nothing",
"input": "<div style=\"color: blue\" />",
"output": "<div style='color: blue;'/>",
"xhtml": "<div style='color: blue;'></div>",
"rexml": "<div style='color: blue;'></div>"
},
{
"name": "style_attr_end_with_space",
"input": "<div style=\"color: blue \" />",
"output": "<div style='color: blue ;'/>",
"xhtml": "<div style='color: blue ;'></div>",
"rexml": "<div style='color: blue ;'></div>"
},
{
"name": "style_attr_end_with_semicolon",
"input": "<div style=\"color: blue;\" />",
"output": "<div style='color: blue;'/>",
"xhtml": "<div style='color: blue;'></div>",
"rexml": "<div style='color: blue;'></div>"
},
{
"name": "style_attr_end_with_semicolon_space",
"input": "<div style=\"color: blue; \" />",
"output": "<div style='color: blue;'/>",
"xhtml": "<div style='color: blue;'></div>",
"rexml": "<div style='color: blue;'></div>"
},
{
"name": "attributes_with_embedded_quotes",
"input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />",
"output": "<img src='doesntexist.jpg&quot;&apos;onerror=&quot;alert(1)'/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "attributes_with_embedded_quotes_II",
"input": "<img src=notthere.jpg\"\"onerror=\"alert(2) />",
"output": "<img src='notthere.jpg&quot;&quot;onerror=&quot;alert(2)'/>",
"rexml": "Ill-formed XHTML!"
}
]

View File

@@ -1,125 +0,0 @@
{"tests": [
{"description": "proper attribute value escaping",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test \"with\" &quot;"}]]],
"expected": ["<span title='test \"with\" &amp;quot;'>"]
},
{"description": "proper attribute value non-quoting",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo"}]]],
"expected": ["<span title=foo>"],
"xhtml": ["<span title=\"foo\">"]
},
{"description": "proper attribute value non-quoting (with <)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo<bar"}]]],
"expected": ["<span title=foo<bar>"],
"xhtml": ["<span title=\"foo&lt;bar\">"]
},
{"description": "proper attribute value quoting (with =)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo=bar"}]]],
"expected": ["<span title=\"foo=bar\">"]
},
{"description": "proper attribute value quoting (with >)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo>bar"}]]],
"expected": ["<span title=\"foo>bar\">"]
},
{"description": "proper attribute value quoting (with \")",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\"bar"}]]],
"expected": ["<span title='foo\"bar'>"]
},
{"description": "proper attribute value quoting (with ')",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar"}]]],
"expected": ["<span title=\"foo'bar\">"]
},
{"description": "proper attribute value quoting (with both \" and ')",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar\"baz"}]]],
"expected": ["<span title=\"foo'bar&quot;baz\">"]
},
{"description": "proper attribute value quoting (with space)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo bar"}]]],
"expected": ["<span title=\"foo bar\">"]
},
{"description": "proper attribute value quoting (with tab)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\tbar"}]]],
"expected": ["<span title=\"foo\tbar\">"]
},
{"description": "proper attribute value quoting (with LF)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\nbar"}]]],
"expected": ["<span title=\"foo\nbar\">"]
},
{"description": "proper attribute value quoting (with CR)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\rbar"}]]],
"expected": ["<span title=\"foo\rbar\">"]
},
{"description": "proper attribute value non-quoting (with linetab)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Bbar"}]]],
"expected": ["<span title=foo\u000Bbar>"],
"xhtml": ["<span title=\"foo\u000Bbar\">"]
},
{"description": "proper attribute value quoting (with form feed)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Cbar"}]]],
"expected": ["<span title=\"foo\u000Cbar\">"]
},
{"description": "void element (as EmptyTag token)",
"input": [["EmptyTag", "img", {}]],
"expected": ["<img>"],
"xhtml": ["<img />"]
},
{"description": "void element (as StartTag token)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "img", {}]],
"expected": ["<img>"],
"xhtml": ["<img />"]
},
{"description": "doctype in error",
"input": [["Doctype", "foo"]],
"expected": ["<!DOCTYPE foo>"]
},
{"description": "character data",
"options": {"encoding":"utf-8"},
"input": [["Characters", "a<b>c&d"]],
"expected": ["a&lt;b&gt;c&amp;d"]
},
{"description": "rcdata",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
"expected": ["<script>a<b>c&d"],
"xhtml": ["<script>a&lt;b&gt;c&amp;d"]
},
{"description": "doctype",
"input": [["Doctype", "HTML"]],
"expected": ["<!DOCTYPE HTML>"]
},
{"description": "HTML 4.01 DOCTYPE",
"input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd"]],
"expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"]
},
{"description": "HTML 4.01 DOCTYPE without system identifer",
"input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN"]],
"expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"]
},
{"description": "IBM DOCTYPE without public identifer",
"input": [["Doctype", "html", "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]],
"expected": ["<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"]
}
]}

View File

@@ -1,66 +0,0 @@
{"tests": [
{"description": "no encoding",
"options": {"inject_meta_charset": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": [""],
"xhtml": ["<head></head>"]
},
{"description": "empytag head",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8>"],
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
},
{"description": "head w/title",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml","title",{}], ["Characters", "foo"],["EndTag", "http://www.w3.org/1999/xhtml", "title"], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8><title>foo</title>"],
"xhtml": ["<head><meta charset=\"utf-8\" /><title>foo</title></head>"]
},
{"description": "head w/meta-charset",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8>"],
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
},
{"description": "head w/ two meta-charset",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8><meta charset=utf-8>", "<head><meta charset=utf-8><meta charset=ascii>"],
"xhtml": ["<head><meta charset=\"utf-8\" /><meta charset=\"utf-8\" /></head>", "<head><meta charset=\"utf-8\" /><meta charset=\"ascii\" /></head>"]
},
{"description": "head w/robots",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8><meta content=noindex name=robots>"],
"xhtml": ["<head><meta charset=\"utf-8\" /><meta content=\"noindex\" name=\"robots\" /></head>"]
},
{"description": "head w/robots & charset",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta content=noindex name=robots><meta charset=utf-8>"],
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta charset=\"utf-8\" /></head>"]
},
{"description": "head w/ charset in http-equiv content-type",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
"xhtml": ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
},
{"description": "head w/robots & charset in http-equiv content-type",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
}
]}

View File

@@ -1,965 +0,0 @@
{"tests": [
{"description": "html start-tag followed by text, with attributes",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", [{"namespace": null, "name": "lang", "value": "en"}]], ["Characters", "foo"]],
"expected": ["<html lang=en>foo"]
},
{"description": "html start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Comment", "foo"]],
"expected": ["<html><!--foo-->"]
},
{"description": "html start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", " foo"]],
"expected": ["<html> foo"]
},
{"description": "html start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "html start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "html start-tag followed by end-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "html start-tag at EOF (shouldn't ever happen?!)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}]],
"expected": [""]
},
{"description": "html end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Comment", "foo"]],
"expected": ["</html><!--foo-->"]
},
{"description": "html end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", " foo"]],
"expected": ["</html> foo"]
},
{"description": "html end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "html end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "html end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "html end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"]],
"expected": [""]
},
{"description": "head start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Comment", "foo"]],
"expected": ["<head><!--foo-->"]
},
{"description": "head start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", " foo"]],
"expected": ["<head> foo"]
},
{"description": "head start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", "foo"]],
"expected": ["<head>foo"]
},
{"description": "head start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "head start-tag followed by end-tag (shouldn't ever happen?!)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["<head></foo>", "</foo>"]
},
{"description": "empty head element",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": [""]
},
{"description": "head start-tag followed by empty-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "head start-tag at EOF (shouldn't ever happen?!)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}]],
"expected": ["<head>", ""]
},
{"description": "head end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Comment", "foo"]],
"expected": ["</head><!--foo-->"]
},
{"description": "head end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", " foo"]],
"expected": ["</head> foo"]
},
{"description": "head end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "head end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "head end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "head end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": [""]
},
{"description": "body start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Comment", "foo"]],
"expected": ["<body><!--foo-->"]
},
{"description": "body start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", " foo"]],
"expected": ["<body> foo"]
},
{"description": "body start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "body start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "body start-tag followed by end-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "body start-tag at EOF (shouldn't ever happen?!)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}]],
"expected": [""]
},
{"description": "body end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Comment", "foo"]],
"expected": ["</body><!--foo-->"]
},
{"description": "body end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", " foo"]],
"expected": ["</body> foo"]
},
{"description": "body end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "body end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "body end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "body end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"]],
"expected": [""]
},
{"description": "li end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Comment", "foo"]],
"expected": ["</li><!--foo-->"]
},
{"description": "li end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", " foo"]],
"expected": ["</li> foo"]
},
{"description": "li end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", "foo"]],
"expected": ["</li>foo"]
},
{"description": "li end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</li><foo>"]
},
{"description": "li end-tag followed by li start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "li", {}]],
"expected": ["<li>"]
},
{"description": "li end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "li end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"]],
"expected": [""]
},
{"description": "dt end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Comment", "foo"]],
"expected": ["</dt><!--foo-->"]
},
{"description": "dt end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", " foo"]],
"expected": ["</dt> foo"]
},
{"description": "dt end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", "foo"]],
"expected": ["</dt>foo"]
},
{"description": "dt end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</dt><foo>"]
},
{"description": "dt end-tag followed by dt start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
"expected": ["<dt>"]
},
{"description": "dt end-tag followed by dd start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
"expected": ["<dd>"]
},
{"description": "dt end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</dt></foo>"]
},
{"description": "dt end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"]],
"expected": ["</dt>"]
},
{"description": "dd end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Comment", "foo"]],
"expected": ["</dd><!--foo-->"]
},
{"description": "dd end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", " foo"]],
"expected": ["</dd> foo"]
},
{"description": "dd end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", "foo"]],
"expected": ["</dd>foo"]
},
{"description": "dd end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</dd><foo>"]
},
{"description": "dd end-tag followed by dd start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
"expected": ["<dd>"]
},
{"description": "dd end-tag followed by dt start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
"expected": ["<dt>"]
},
{"description": "dd end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "dd end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"]],
"expected": [""]
},
{"description": "p end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Comment", "foo"]],
"expected": ["</p><!--foo-->"]
},
{"description": "p end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", " foo"]],
"expected": ["</p> foo"]
},
{"description": "p end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", "foo"]],
"expected": ["</p>foo"]
},
{"description": "p end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</p><foo>"]
},
{"description": "p end-tag followed by address start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "address", {}]],
"expected": ["<address>"]
},
{"description": "p end-tag followed by article start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "article", {}]],
"expected": ["<article>"]
},
{"description": "p end-tag followed by aside start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "aside", {}]],
"expected": ["<aside>"]
},
{"description": "p end-tag followed by blockquote start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "blockquote", {}]],
"expected": ["<blockquote>"]
},
{"description": "p end-tag followed by datagrid start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "datagrid", {}]],
"expected": ["<datagrid>"]
},
{"description": "p end-tag followed by dialog start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dialog", {}]],
"expected": ["<dialog>"]
},
{"description": "p end-tag followed by dir start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dir", {}]],
"expected": ["<dir>"]
},
{"description": "p end-tag followed by div start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
"expected": ["<div>"]
},
{"description": "p end-tag followed by dl start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dl", {}]],
"expected": ["<dl>"]
},
{"description": "p end-tag followed by fieldset start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "fieldset", {}]],
"expected": ["<fieldset>"]
},
{"description": "p end-tag followed by footer start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "footer", {}]],
"expected": ["<footer>"]
},
{"description": "p end-tag followed by form start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "form", {}]],
"expected": ["<form>"]
},
{"description": "p end-tag followed by h1 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h1", {}]],
"expected": ["<h1>"]
},
{"description": "p end-tag followed by h2 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h2", {}]],
"expected": ["<h2>"]
},
{"description": "p end-tag followed by h3 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h3", {}]],
"expected": ["<h3>"]
},
{"description": "p end-tag followed by h4 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h4", {}]],
"expected": ["<h4>"]
},
{"description": "p end-tag followed by h5 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h5", {}]],
"expected": ["<h5>"]
},
{"description": "p end-tag followed by h6 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h6", {}]],
"expected": ["<h6>"]
},
{"description": "p end-tag followed by header start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "header", {}]],
"expected": ["<header>"]
},
{"description": "p end-tag followed by hr empty-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EmptyTag", "hr", {}]],
"expected": ["<hr>"]
},
{"description": "p end-tag followed by menu start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "menu", {}]],
"expected": ["<menu>"]
},
{"description": "p end-tag followed by nav start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "nav", {}]],
"expected": ["<nav>"]
},
{"description": "p end-tag followed by ol start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ol", {}]],
"expected": ["<ol>"]
},
{"description": "p end-tag followed by p start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "p", {}]],
"expected": ["<p>"]
},
{"description": "p end-tag followed by pre start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}]],
"expected": ["<pre>"]
},
{"description": "p end-tag followed by section start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "section", {}]],
"expected": ["<section>"]
},
{"description": "p end-tag followed by table start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "table", {}]],
"expected": ["<table>"]
},
{"description": "p end-tag followed by ul start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ul", {}]],
"expected": ["<ul>"]
},
{"description": "p end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "p end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"]],
"expected": [""]
},
{"description": "optgroup end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Comment", "foo"]],
"expected": ["</optgroup><!--foo-->"]
},
{"description": "optgroup end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", " foo"]],
"expected": ["</optgroup> foo"]
},
{"description": "optgroup end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", "foo"]],
"expected": ["</optgroup>foo"]
},
{"description": "optgroup end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</optgroup><foo>"]
},
{"description": "optgroup end-tag followed by optgroup start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
"expected": ["<optgroup>"]
},
{"description": "optgroup end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "optgroup end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"]],
"expected": [""]
},
{"description": "option end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Comment", "foo"]],
"expected": ["</option><!--foo-->"]
},
{"description": "option end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", " foo"]],
"expected": ["</option> foo"]
},
{"description": "option end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", "foo"]],
"expected": ["</option>foo"]
},
{"description": "option end-tag followed by optgroup start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
"expected": ["<optgroup>"]
},
{"description": "option end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</option><foo>"]
},
{"description": "option end-tag followed by option start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "option", {}]],
"expected": ["<option>"]
},
{"description": "option end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "option end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"]],
"expected": [""]
},
{"description": "colgroup start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Comment", "foo"]],
"expected": ["<colgroup><!--foo-->"]
},
{"description": "colgroup start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", " foo"]],
"expected": ["<colgroup> foo"]
},
{"description": "colgroup start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", "foo"]],
"expected": ["<colgroup>foo"]
},
{"description": "colgroup start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<colgroup><foo>"]
},
{"description": "first colgroup in a table with a col child",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EmptyTag", "col", {}]],
"expected": ["<table><col>"]
},
{"description": "colgroup with a col child, following another colgroup",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "col", {}]],
"expected": ["</colgroup><col>", "<colgroup><col>"]
},
{"description": "colgroup start-tag followed by end-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["<colgroup></foo>"]
},
{"description": "colgroup start-tag at EOF",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}]],
"expected": ["<colgroup>"]
},
{"description": "colgroup end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Comment", "foo"]],
"expected": ["</colgroup><!--foo-->"]
},
{"description": "colgroup end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", " foo"]],
"expected": ["</colgroup> foo"]
},
{"description": "colgroup end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "colgroup end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "colgroup end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "colgroup end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"]],
"expected": [""]
},
{"description": "thead end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Comment", "foo"]],
"expected": ["</thead><!--foo-->"]
},
{"description": "thead end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", " foo"]],
"expected": ["</thead> foo"]
},
{"description": "thead end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", "foo"]],
"expected": ["</thead>foo"]
},
{"description": "thead end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</thead><foo>"]
},
{"description": "thead end-tag followed by tbody start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
"expected": ["<tbody>"]
},
{"description": "thead end-tag followed by tfoot start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
"expected": ["<tfoot>"]
},
{"description": "thead end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</thead></foo>"]
},
{"description": "thead end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"]],
"expected": ["</thead>"]
},
{"description": "tbody start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Comment", "foo"]],
"expected": ["<tbody><!--foo-->"]
},
{"description": "tbody start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", " foo"]],
"expected": ["<tbody> foo"]
},
{"description": "tbody start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", "foo"]],
"expected": ["<tbody>foo"]
},
{"description": "tbody start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<tbody><foo>"]
},
{"description": "first tbody in a table with a tr child",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<table><tr>"]
},
{"description": "tbody with a tr child, following another tbody",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<tbody><tr>", "</tbody><tr>"]
},
{"description": "tbody with a tr child, following a thead",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<tbody><tr>", "</thead><tr>"]
},
{"description": "tbody with a tr child, following a tfoot",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<tbody><tr>", "</tfoot><tr>"]
},
{"description": "tbody start-tag followed by end-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["<tbody></foo>"]
},
{"description": "tbody start-tag at EOF",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
"expected": ["<tbody>"]
},
{"description": "tbody end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Comment", "foo"]],
"expected": ["</tbody><!--foo-->"]
},
{"description": "tbody end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", " foo"]],
"expected": ["</tbody> foo"]
},
{"description": "tbody end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", "foo"]],
"expected": ["</tbody>foo"]
},
{"description": "tbody end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</tbody><foo>"]
},
{"description": "tbody end-tag followed by tbody start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
"expected": ["<tbody>", "</tbody>"]
},
{"description": "tbody end-tag followed by tfoot start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
"expected": ["<tfoot>"]
},
{"description": "tbody end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "tbody end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"]],
"expected": [""]
},
{"description": "tfoot end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Comment", "foo"]],
"expected": ["</tfoot><!--foo-->"]
},
{"description": "tfoot end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", " foo"]],
"expected": ["</tfoot> foo"]
},
{"description": "tfoot end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", "foo"]],
"expected": ["</tfoot>foo"]
},
{"description": "tfoot end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</tfoot><foo>"]
},
{"description": "tfoot end-tag followed by tbody start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
"expected": ["<tbody>", "</tfoot>"]
},
{"description": "tfoot end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "tfoot end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"]],
"expected": [""]
},
{"description": "tr end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Comment", "foo"]],
"expected": ["</tr><!--foo-->"]
},
{"description": "tr end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", " foo"]],
"expected": ["</tr> foo"]
},
{"description": "tr end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", "foo"]],
"expected": ["</tr>foo"]
},
{"description": "tr end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</tr><foo>"]
},
{"description": "tr end-tag followed by tr start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<tr>", "</tr>"]
},
{"description": "tr end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "tr end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"]],
"expected": [""]
},
{"description": "td end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Comment", "foo"]],
"expected": ["</td><!--foo-->"]
},
{"description": "td end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", " foo"]],
"expected": ["</td> foo"]
},
{"description": "td end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", "foo"]],
"expected": ["</td>foo"]
},
{"description": "td end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</td><foo>"]
},
{"description": "td end-tag followed by td start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
"expected": ["<td>", "</td>"]
},
{"description": "td end-tag followed by th start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
"expected": ["<th>", "</td>"]
},
{"description": "td end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "td end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"]],
"expected": [""]
},
{"description": "th end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Comment", "foo"]],
"expected": ["</th><!--foo-->"]
},
{"description": "th end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", " foo"]],
"expected": ["</th> foo"]
},
{"description": "th end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", "foo"]],
"expected": ["</th>foo"]
},
{"description": "th end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</th><foo>"]
},
{"description": "th end-tag followed by th start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
"expected": ["<th>", "</th>"]
},
{"description": "th end-tag followed by td start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
"expected": ["<td>", "</th>"]
},
{"description": "th end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "th end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml" , "th"]],
"expected": [""]
}
]}

View File

@@ -1,60 +0,0 @@
{"tests":[
{"description": "quote_char=\"'\"",
"options": {"quote_char": "'"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test 'with' quote_char"}]]],
"expected": ["<span title='test &#39;with&#39; quote_char'>"]
},
{"description": "quote_attr_values=true",
"options": {"quote_attr_values": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "button", [{"namespace": null, "name": "disabled", "value" :"disabled"}]]],
"expected": ["<button disabled>"],
"xhtml": ["<button disabled=\"disabled\">"]
},
{"description": "quote_attr_values=true with irrelevant",
"options": {"quote_attr_values": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
"expected": ["<div irrelevant>"],
"xhtml": ["<div irrelevant=\"irrelevant\">"]
},
{"description": "use_trailing_solidus=true with void element",
"options": {"use_trailing_solidus": true},
"input": [["EmptyTag", "img", {}]],
"expected": ["<img />"]
},
{"description": "use_trailing_solidus=true with non-void element",
"options": {"use_trailing_solidus": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
"expected": ["<div>"]
},
{"description": "minimize_boolean_attributes=false",
"options": {"minimize_boolean_attributes": false},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
"expected": ["<div irrelevant=irrelevant>"],
"xhtml": ["<div irrelevant=\"irrelevant\">"]
},
{"description": "minimize_boolean_attributes=false with empty value",
"options": {"minimize_boolean_attributes": false},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :""}]]],
"expected": ["<div irrelevant=\"\">"]
},
{"description": "escape less than signs in attribute values",
"options": {"escape_lt_in_attrs": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "a", [{"namespace": null, "name": "title", "value": "a<b>c&d"}]]],
"expected": ["<a title=\"a&lt;b>c&amp;d\">"]
},
{"description": "rcdata",
"options": {"escape_rcdata": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
"expected": ["<script>a&lt;b&gt;c&amp;d"]
}
]}

View File

@@ -1,51 +0,0 @@
{"tests": [
{"description": "bare text with leading spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "\t\r\n\u000C foo"]],
"expected": [" foo"]
},
{"description": "bare text with trailing spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "foo \t\r\n\u000C"]],
"expected": ["foo "]
},
{"description": "bare text with inner spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "foo \t\r\n\u000C bar"]],
"expected": ["foo bar"]
},
{"description": "text within <pre>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
"expected": ["<pre>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</pre>"]
},
{"description": "text within <pre>, with inner markup",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C fo"], ["StartTag", "http://www.w3.org/1999/xhtml", "span", {}], ["Characters", "o \t\r\n\u000C b"], ["EndTag", "http://www.w3.org/1999/xhtml", "span"], ["Characters", "ar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
"expected": ["<pre>\t\r\n\u000C fo<span>o \t\r\n\u000C b</span>ar \t\r\n\u000C</pre>"]
},
{"description": "text within <textarea>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "textarea", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "textarea"]],
"expected": ["<textarea>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</textarea>"]
},
{"description": "text within <script>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "script"]],
"expected": ["<script>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</script>"]
},
{"description": "text within <style>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "style"]],
"expected": ["<style>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</style>"]
}
]}

View File

@@ -1,43 +0,0 @@
[
{"type": "text/html", "input": ""},
{"type": "text/html", "input": "<!---->"},
{"type": "text/html", "input": "<!--asdfaslkjdf;laksjdf as;dkfjsd-->"},
{"type": "text/html", "input": "<!"},
{"type": "text/html", "input": "\t"},
{"type": "text/html", "input": "<!>"},
{"type": "text/html", "input": "<?"},
{"type": "text/html", "input": "<??>"},
{"type": "application/rss+xml", "input": "<rss"},
{"type": "application/atom+xml", "input": "<feed"},
{"type": "text/html", "input": "<html"},
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n<html><head>\n<title>302 Found</title>\n</head><body>\n<h1>Found</h1>\n<p>The document has moved <a href=\"http://feeds.feedburner.com/gofug\">here</a>.</p>\n</body></html>\n"},
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\r\n<HTML><HEAD>\r\n <link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/289619328/feed.css\" /><link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/431602649/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/382549546/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/314618017/feed.css\" /><META http-equiv=\"expires\" content="},
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n<html>\r\n<head>\r\n<title>Xiaxue - Chicken pie blogger.</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\"><style type=\"text/css\">\r\n<style type=\"text/css\">\r\n<!--\r\nbody {\r\n background-color: #FFF2F2;\r\n}\r\n.style1 {font-family: Georgia, \"Times New Roman\", Times, serif}\r\n.style2 {\r\n color: #8a567c;\r\n font-size: 14px;\r\n font-family: Georgia, \"Times New Roman\", Times, serif;\r\n}\r"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head> \r\n<title>Google Operating System</title>\r\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"Description\" content=\"Unofficial news and tips about Google. A blog that watches Google's latest developments and the attempts to move your operating system online.\" />\r\n<meta name=\"generator\" c"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n <title>Assimilated Press</title> <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Assimilated Press - Atom\" href=\"http://assimila"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n <title>PostSecret</title>\r\n<META name=\"keywords\" Content=\"secrets, postcard, secret, postcards, postsecret, postsecrets,online confessional, post secret, post secrets, artomatic, post a secret\"><META name=\"discription\" Content=\"See a Secret...Share a Secret\"> <meta http-equiv=\"Content-Type\" content=\"te"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns='http://www.w3.org/1999/xhtml' xmlns:b='http://www.google.com/2005/gml/b' xmlns:data='http://www.google.com/2005/gml/data' xmlns:expr='http://www.google.com/2005/gml/expr'>\n <head>\n \n <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'/>\n <meta content='true' name='MSSmartTagsPreventParsing'/>\n <meta content='blogger' name='generator'/>\n <link rel=\"alternate\" typ"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\">\n<head profile=\"http://gmpg.org/xfn/11\"> \n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /> \n<title> CMS Lever</title><link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"http://s.wordpress.com/wp-content/themes/pub/twenty-eight/2813.css\"/>\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" h"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> Park Avenue Peerage</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://parkavenuepeerage.wordpress.com/feed/\" />\t<link rel=\"pingback\" href="},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> \u884c\u96f2\u6d41\u6c34 -like a floating clouds and running water-</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://shw4.wordpress.com/feed/\" />\t<li"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Go Fug Yourself</title><link rel=\"stylesheet\" href=\"http://gofugyourself.typepad.com/go_fug_yourself/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Atom\" "},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head profile=\"http://gmpg.org/xfn/11\">\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /><title> Ladies&#8230;</title><meta name=\"generator\" content=\"WordPress.com\" /> <!-- leave this for stats --><link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/default/style.css?1\" type=\"tex"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\r\n<head>\r\n <title>The Sartorialist</title> <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"The Sartorialist - Atom\" href=\"http://thesartorialist.blogspot"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Creating Passionate Users</title><link rel=\"stylesheet\" href=\"http://headrush.typepad.com/creating_passionate_users/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n <meta name=\"keywords\" content=\"marketing, blog, seth, ideas, respect, permission\" />\n <meta name=\"description\" content=\"Seth Godin's riffs on marketing, respect, and the "},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n \n <meta name=\"description\" content=\" Western Civilization hangs in the balance. This blog is part of the solution,the cure. Get your heads out of the sand and Fight the G"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\" />\n<title> From Under the Rotunda</title>\n<link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/pub/andreas04/style.css\" type=\"text/css\""},
{"type": "application/atom+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href=\"http://www.blogger.com/styles/atom.css\" type=\"text/css\"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/'><id>tag:blogger.com,1999:blog-10861780</id><updated>2007-07-27T12:38:50.888-07:00</updated><title type='text'>Official Google Blog</title><link rel='alternate' type='text/html' href='http://googleblog.blogspot.com/'/><link rel='next' type='application/atom+xml' href='http://googleblog.blogs"},
{"type": "application/rss+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><rss xmlns:atom='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' version='2.0'><channel><atom:id>tag:blogger.com,1999:blog-10861780</atom:id><lastBuildDate>Fri, 27 Jul 2007 19:38:50 +0000</lastBuildDate><title>Official Google Blog</title><description/><link>http://googleblog.blogspot.com/</link><managingEditor>Eric Case</managingEditor><generator>Blogger</generator><openSearch:totalResults>729</openSearch:totalResults><openSearc"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>From Under the Rotunda</title>\n\t<link>http://dannybernardi.wordpress.com</link>\n\t<description>The Monographs of Danny Ber"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>CMS Lever</title>\n\t<link>http://kanaguri.wordpress.com</link>\n\t<description>CMS\u306e\u6c17\u306b\u306a\u3063\u305f\u3053\u3068</description>\n\t<pubDate>Wed, 18 Jul 2007 21:26:22 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>ja</languag"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\">\n <title>Atlas Shrugs</title>\n <link rel=\"self\" type=\"application/atom+xml\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/atom.xml\" />\n <link rel=\"alternate\" type=\"text/html\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/\" />\n <id>tag:typepad.com,2003:weblog-132946</id>\n <updated>2007-08-15T16:07:34-04"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n <title>Creating Passionate Users</title>\r\n "},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n <title>Seth's Blog</title>\r\n <link rel=\"alternate\" type=\"text/html\" href=\"http://sethgodin.typepad.com/seths_blog/\" />\r\n <link rel=\"s"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:openSearch=\"http://a9.com/-/spec/opensearchrss/1.0/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\"><id>tag:blogger.com,1999:blog-32454861</id><updated>2007-07-31T21:44:09.867+02:00</upd"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atomfull.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://purl.org/atom/ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"0.3\">\r\n <title>Go Fug Yourself</title>\r\n <link rel=\"alternate\" type=\"text/html\" href=\"http://go"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/rss2full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><rss xmlns:creativeCommons=\"http://backend.userland.com/creativeCommonsRssModule\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"2.0\"><channel><title>Google Operating System</title><link>http://googlesystem.blogspot.com/</link>"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>Nunublog</title>\n\t<link>http://nunubh.wordpress.com</link>\n\t<description>Just Newbie Blog!</description>\n\t<pubDate>Mon, 09 Jul 2007 18:54:09 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>id</language>\n\t\t\t<item>\n\t\t<ti"},
{"type": "text/html", "input": "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<HEAD>\r\n<TITLE>Design*Sponge</TITLE><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Design*Sponge - Atom\" href=\"http://designsponge.blogspot.com/feeds/posts/default\" />\r\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"Design*Sponge - RSS\" href="},
{"type": "text/html", "input": "<HTML>\n<HEAD>\n<TITLE>Moved Temporarily</TITLE>\n</HEAD>\n<BODY BGCOLOR=\"#FFFFFF\" TEXT=\"#000000\">\n<H1>Moved Temporarily</H1>\nThe document has moved <A HREF=\"http://feeds.feedburner.com/thesecretdiaryofstevejobs\">here</A>.\n</BODY>\n</HTML>\n"}
]

View File

@@ -1,75 +0,0 @@
{"tests": [
{"description":"PLAINTEXT content model flag",
"initialStates":["PLAINTEXT state"],
"lastStartTag":"plaintext",
"input":"<head>&body;",
"output":[["Character", "<head>&body;"]]},
{"description":"End tag closing RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp>",
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
{"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xMp>",
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
{"description":"End tag closing RCDATA or RAWTEXT (ending with space)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp ",
"output":[["Character", "foo"], "ParseError"]},
{"description":"End tag closing RCDATA or RAWTEXT (ending with EOF)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp",
"output":[["Character", "foo</xmp"]]},
{"description":"End tag closing RCDATA or RAWTEXT (ending with slash)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp/",
"output":[["Character", "foo"], "ParseError"]},
{"description":"End tag not closing RCDATA or RAWTEXT (ending with left-angle-bracket)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp<",
"output":[["Character", "foo</xmp<"]]},
{"description":"End tag with incorrect name in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</foo>bar</xmp>",
"output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
{"description":"End tag with incorrect name in RCDATA or RAWTEXT (starting like correct name)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</foo>bar</xmpaar>",
"output":[["Character", "</foo>bar</xmpaar>"]]},
{"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp></baz>",
"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
{"description":"RAWTEXT w/ something looking like an entity",
"initialStates":["RAWTEXT state"],
"lastStartTag":"xmp",
"input":"&foo;",
"output":[["Character", "&foo;"]]},
{"description":"RCDATA w/ an entity",
"initialStates":["RCDATA state"],
"lastStartTag":"textarea",
"input":"&lt;",
"output":[["Character", "<"]]}
]}

View File

@@ -1,90 +0,0 @@
{
"tests": [
{
"description":"CR in bogus comment state",
"input":"<?\u000d",
"output":["ParseError", ["Comment", "?\u000a"]]
},
{
"description":"CRLF in bogus comment state",
"input":"<?\u000d\u000a",
"output":["ParseError", ["Comment", "?\u000a"]]
},
{
"description":"NUL in RCDATA and RAWTEXT",
"doubleEscaped":true,
"initialStates":["RCDATA state", "RAWTEXT state"],
"input":"\\u0000",
"output":["ParseError", ["Character", "\\uFFFD"]]
},
{
"description":"skip first BOM but not later ones",
"input":"\uFEFFfoo\uFEFFbar",
"output":[["Character", "foo\uFEFFbar"]]
},
{
"description":"Non BMP-charref in in RCDATA",
"initialStates":["RCDATA state"],
"input":"&NotEqualTilde;",
"output":[["Character", "\u2242\u0338"]]
},
{
"description":"Bad charref in in RCDATA",
"initialStates":["RCDATA state"],
"input":"&NotEqualTild;",
"output":["ParseError", ["Character", "&NotEqualTild;"]]
},
{
"description":"lowercase endtags in RCDATA and RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</XMP>",
"output":[["EndTag","xmp"]]
},
{
"description":"bad endtag in RCDATA and RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</ XMP>",
"output":[["Character","</ XMP>"]]
},
{
"description":"bad endtag in RCDATA and RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</xm>",
"output":[["Character","</xm>"]]
},
{
"description":"bad endtag in RCDATA and RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</xm ",
"output":[["Character","</xm "]]
},
{
"description":"bad endtag in RCDATA and RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</xm/",
"output":[["Character","</xm/"]]
},
{
"description":"Non BMP-charref in attribute",
"input":"<p id=\"&NotEqualTilde;\">",
"output":[["StartTag", "p", {"id":"\u2242\u0338"}]]
},
{
"description":"--!NUL in comment ",
"doubleEscaped":true,
"input":"<!----!\\u0000-->",
"output":["ParseError", ["Comment", "--!\\uFFFD"]]
},
{
"description":"space EOF after doctype ",
"input":"<!DOCTYPE html ",
"output":["ParseError", ["DOCTYPE", "html", null, null , false]]
}
]
}

View File

@@ -1,283 +0,0 @@
{"tests": [
{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
"input":"<h a='&noti;'>",
"output": ["ParseError", ["StartTag", "h", {"a": "&noti;"}]]},
{"description": "Entity name followed by the equals sign in an attribute value.",
"input":"<h a='&lang='>",
"output": ["ParseError", ["StartTag", "h", {"a": "&lang="}]]},
{"description": "CR as numeric entity",
"input":"&#013;",
"output": ["ParseError", ["Character", "\r"]]},
{"description": "CR as hexadecimal numeric entity",
"input":"&#x00D;",
"output": ["ParseError", ["Character", "\r"]]},
{"description": "Windows-1252 EURO SIGN numeric entity.",
"input":"&#0128;",
"output": ["ParseError", ["Character", "\u20AC"]]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0129;",
"output": ["ParseError", ["Character", "\u0081"]]},
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
"input":"&#0130;",
"output": ["ParseError", ["Character", "\u201A"]]},
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.",
"input":"&#0131;",
"output": ["ParseError", ["Character", "\u0192"]]},
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.",
"input":"&#0132;",
"output": ["ParseError", ["Character", "\u201E"]]},
{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.",
"input":"&#0133;",
"output": ["ParseError", ["Character", "\u2026"]]},
{"description": "Windows-1252 DAGGER numeric entity.",
"input":"&#0134;",
"output": ["ParseError", ["Character", "\u2020"]]},
{"description": "Windows-1252 DOUBLE DAGGER numeric entity.",
"input":"&#0135;",
"output": ["ParseError", ["Character", "\u2021"]]},
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.",
"input":"&#0136;",
"output": ["ParseError", ["Character", "\u02C6"]]},
{"description": "Windows-1252 PER MILLE SIGN numeric entity.",
"input":"&#0137;",
"output": ["ParseError", ["Character", "\u2030"]]},
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.",
"input":"&#0138;",
"output": ["ParseError", ["Character", "\u0160"]]},
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.",
"input":"&#0139;",
"output": ["ParseError", ["Character", "\u2039"]]},
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.",
"input":"&#0140;",
"output": ["ParseError", ["Character", "\u0152"]]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0141;",
"output": ["ParseError", ["Character", "\u008D"]]},
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
"input":"&#0142;",
"output": ["ParseError", ["Character", "\u017D"]]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0143;",
"output": ["ParseError", ["Character", "\u008F"]]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0144;",
"output": ["ParseError", ["Character", "\u0090"]]},
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
"input":"&#0145;",
"output": ["ParseError", ["Character", "\u2018"]]},
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.",
"input":"&#0146;",
"output": ["ParseError", ["Character", "\u2019"]]},
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.",
"input":"&#0147;",
"output": ["ParseError", ["Character", "\u201C"]]},
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.",
"input":"&#0148;",
"output": ["ParseError", ["Character", "\u201D"]]},
{"description": "Windows-1252 BULLET numeric entity.",
"input":"&#0149;",
"output": ["ParseError", ["Character", "\u2022"]]},
{"description": "Windows-1252 EN DASH numeric entity.",
"input":"&#0150;",
"output": ["ParseError", ["Character", "\u2013"]]},
{"description": "Windows-1252 EM DASH numeric entity.",
"input":"&#0151;",
"output": ["ParseError", ["Character", "\u2014"]]},
{"description": "Windows-1252 SMALL TILDE numeric entity.",
"input":"&#0152;",
"output": ["ParseError", ["Character", "\u02DC"]]},
{"description": "Windows-1252 TRADE MARK SIGN numeric entity.",
"input":"&#0153;",
"output": ["ParseError", ["Character", "\u2122"]]},
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.",
"input":"&#0154;",
"output": ["ParseError", ["Character", "\u0161"]]},
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.",
"input":"&#0155;",
"output": ["ParseError", ["Character", "\u203A"]]},
{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.",
"input":"&#0156;",
"output": ["ParseError", ["Character", "\u0153"]]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0157;",
"output": ["ParseError", ["Character", "\u009D"]]},
{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
"input":"&#x080;",
"output": ["ParseError", ["Character", "\u20AC"]]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x081;",
"output": ["ParseError", ["Character", "\u0081"]]},
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x082;",
"output": ["ParseError", ["Character", "\u201A"]]},
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.",
"input":"&#x083;",
"output": ["ParseError", ["Character", "\u0192"]]},
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x084;",
"output": ["ParseError", ["Character", "\u201E"]]},
{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.",
"input":"&#x085;",
"output": ["ParseError", ["Character", "\u2026"]]},
{"description": "Windows-1252 DAGGER hexadecimal numeric entity.",
"input":"&#x086;",
"output": ["ParseError", ["Character", "\u2020"]]},
{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.",
"input":"&#x087;",
"output": ["ParseError", ["Character", "\u2021"]]},
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.",
"input":"&#x088;",
"output": ["ParseError", ["Character", "\u02C6"]]},
{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.",
"input":"&#x089;",
"output": ["ParseError", ["Character", "\u2030"]]},
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.",
"input":"&#x08A;",
"output": ["ParseError", ["Character", "\u0160"]]},
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x08B;",
"output": ["ParseError", ["Character", "\u2039"]]},
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.",
"input":"&#x08C;",
"output": ["ParseError", ["Character", "\u0152"]]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x08D;",
"output": ["ParseError", ["Character", "\u008D"]]},
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
"input":"&#x08E;",
"output": ["ParseError", ["Character", "\u017D"]]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x08F;",
"output": ["ParseError", ["Character", "\u008F"]]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x090;",
"output": ["ParseError", ["Character", "\u0090"]]},
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x091;",
"output": ["ParseError", ["Character", "\u2018"]]},
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x092;",
"output": ["ParseError", ["Character", "\u2019"]]},
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x093;",
"output": ["ParseError", ["Character", "\u201C"]]},
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x094;",
"output": ["ParseError", ["Character", "\u201D"]]},
{"description": "Windows-1252 BULLET hexadecimal numeric entity.",
"input":"&#x095;",
"output": ["ParseError", ["Character", "\u2022"]]},
{"description": "Windows-1252 EN DASH hexadecimal numeric entity.",
"input":"&#x096;",
"output": ["ParseError", ["Character", "\u2013"]]},
{"description": "Windows-1252 EM DASH hexadecimal numeric entity.",
"input":"&#x097;",
"output": ["ParseError", ["Character", "\u2014"]]},
{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.",
"input":"&#x098;",
"output": ["ParseError", ["Character", "\u02DC"]]},
{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.",
"input":"&#x099;",
"output": ["ParseError", ["Character", "\u2122"]]},
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.",
"input":"&#x09A;",
"output": ["ParseError", ["Character", "\u0161"]]},
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x09B;",
"output": ["ParseError", ["Character", "\u203A"]]},
{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.",
"input":"&#x09C;",
"output": ["ParseError", ["Character", "\u0153"]]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x09D;",
"output": ["ParseError", ["Character", "\u009D"]]},
{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
"input":"&#x09E;",
"output": ["ParseError", ["Character", "\u017E"]]},
{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
"input":"&#x09F;",
"output": ["ParseError", ["Character", "\u0178"]]},
{"description": "Decimal numeric entity followed by hex character a.",
"input":"&#97a",
"output": ["ParseError", ["Character", "aa"]]},
{"description": "Decimal numeric entity followed by hex character A.",
"input":"&#97A",
"output": ["ParseError", ["Character", "aA"]]},
{"description": "Decimal numeric entity followed by hex character f.",
"input":"&#97f",
"output": ["ParseError", ["Character", "af"]]},
{"description": "Decimal numeric entity followed by hex character A.",
"input":"&#97F",
"output": ["ParseError", ["Character", "aF"]]}
]}

View File

@@ -1,33 +0,0 @@
{"tests": [
{"description":"Commented close tag in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!--</xmp>--></xmp>",
"output":[["Character", "foo<!--"], ["EndTag", "xmp"], ["Character", "-->"], ["EndTag", "xmp"]]},
{"description":"Bogus comment in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!-->baz</xmp>",
"output":[["Character", "foo<!-->baz"], ["EndTag", "xmp"]]},
{"description":"End tag surrounded by bogus comment in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!--></xmp><!-->baz</xmp>",
"output":[["Character", "foo<!-->"], ["EndTag", "xmp"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]]},
{"description":"Commented entities in RCDATA",
"initialStates":["RCDATA state"],
"lastStartTag":"xmp",
"input":" &amp; <!-- &amp; --> &amp; </xmp>",
"output":[["Character", " & <!-- & --> & "], ["EndTag", "xmp"]]},
{"description":"Incorrect comment ending sequences in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!-- x --x>x-- >x--!>x--<></xmp>",
"output":[["Character", "foo<!-- x --x>x-- >x--!>x--<>"], ["EndTag", "xmp"]]}
]}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +0,0 @@
{"tests": [
{"description":"<!---- >",
"input":"<!---- >",
"output":["ParseError", "ParseError", ["Comment","-- >"]]}
]}

View File

@@ -1,196 +0,0 @@
{"tests": [
{"description":"Correct Doctype lowercase",
"input":"<!DOCTYPE html>",
"output":[["DOCTYPE", "html", null, null, true]]},
{"description":"Correct Doctype uppercase",
"input":"<!DOCTYPE HTML>",
"output":[["DOCTYPE", "html", null, null, true]]},
{"description":"Correct Doctype mixed case",
"input":"<!DOCTYPE HtMl>",
"output":[["DOCTYPE", "html", null, null, true]]},
{"description":"Correct Doctype case with EOF",
"input":"<!DOCTYPE HtMl",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Truncated doctype start",
"input":"<!DOC>",
"output":["ParseError", ["Comment", "DOC"]]},
{"description":"Doctype in error",
"input":"<!DOCTYPE foo>",
"output":[["DOCTYPE", "foo", null, null, true]]},
{"description":"Single Start Tag",
"input":"<h>",
"output":[["StartTag", "h", {}]]},
{"description":"Empty end tag",
"input":"</>",
"output":["ParseError"]},
{"description":"Empty start tag",
"input":"<>",
"output":["ParseError", ["Character", "<>"]]},
{"description":"Start Tag w/attribute",
"input":"<h a='b'>",
"output":[["StartTag", "h", {"a":"b"}]]},
{"description":"Start Tag w/attribute no quotes",
"input":"<h a=b>",
"output":[["StartTag", "h", {"a":"b"}]]},
{"description":"Start/End Tag",
"input":"<h></h>",
"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
{"description":"Two unclosed start tags",
"input":"<p>One<p>Two",
"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
{"description":"End Tag w/attribute",
"input":"<h></h a='b'>",
"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
{"description":"Multiple atts",
"input":"<h a='b' c='d'>",
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
{"description":"Multiple atts no space",
"input":"<h a='b'c='d'>",
"output":["ParseError", ["StartTag", "h", {"a":"b", "c":"d"}]]},
{"description":"Repeated attr",
"input":"<h a='b' a='d'>",
"output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
{"description":"Simple comment",
"input":"<!--comment-->",
"output":[["Comment", "comment"]]},
{"description":"Comment, Central dash no space",
"input":"<!----->",
"output":["ParseError", ["Comment", "-"]]},
{"description":"Comment, two central dashes",
"input":"<!-- --comment -->",
"output":["ParseError", ["Comment", " --comment "]]},
{"description":"Unfinished comment",
"input":"<!--comment",
"output":["ParseError", ["Comment", "comment"]]},
{"description":"Start of a comment",
"input":"<!-",
"output":["ParseError", ["Comment", "-"]]},
{"description":"Short comment",
"input":"<!-->",
"output":["ParseError", ["Comment", ""]]},
{"description":"Short comment two",
"input":"<!--->",
"output":["ParseError", ["Comment", ""]]},
{"description":"Short comment three",
"input":"<!---->",
"output":[["Comment", ""]]},
{"description":"Ampersand EOF",
"input":"&",
"output":[["Character", "&"]]},
{"description":"Ampersand ampersand EOF",
"input":"&&",
"output":[["Character", "&&"]]},
{"description":"Ampersand space EOF",
"input":"& ",
"output":[["Character", "& "]]},
{"description":"Unfinished entity",
"input":"&f",
"output":["ParseError", ["Character", "&f"]]},
{"description":"Ampersand, number sign",
"input":"&#",
"output":["ParseError", ["Character", "&#"]]},
{"description":"Unfinished numeric entity",
"input":"&#x",
"output":["ParseError", ["Character", "&#x"]]},
{"description":"Entity with trailing semicolon (1)",
"input":"I'm &not;it",
"output":[["Character","I'm \u00ACit"]]},
{"description":"Entity with trailing semicolon (2)",
"input":"I'm &notin;",
"output":[["Character","I'm \u2209"]]},
{"description":"Entity without trailing semicolon (1)",
"input":"I'm &notit",
"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACit"]]},
{"description":"Entity without trailing semicolon (2)",
"input":"I'm &notin",
"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACin"]]},
{"description":"Partial entity match at end of file",
"input":"I'm &no",
"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
{"description":"Non-ASCII character reference name",
"input":"&\u00AC;",
"output":["ParseError", ["Character", "&\u00AC;"]]},
{"description":"ASCII decimal entity",
"input":"&#0036;",
"output":[["Character","$"]]},
{"description":"ASCII hexadecimal entity",
"input":"&#x3f;",
"output":[["Character","?"]]},
{"description":"Hexadecimal entity in attribute",
"input":"<h a='&#x3f;'></h>",
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
{"description":"Entity in attribute without semicolon ending in x",
"input":"<h a='&notx'>",
"output":["ParseError", ["StartTag", "h", {"a":"&notx"}]]},
{"description":"Entity in attribute without semicolon ending in 1",
"input":"<h a='&not1'>",
"output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},
{"description":"Entity in attribute without semicolon ending in i",
"input":"<h a='&noti'>",
"output":["ParseError", ["StartTag", "h", {"a":"&noti"}]]},
{"description":"Entity in attribute without semicolon",
"input":"<h a='&COPY'>",
"output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]},
{"description":"Unquoted attribute ending in ampersand",
"input":"<s o=& t>",
"output":[["StartTag","s",{"o":"&","t":""}]]},
{"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters",
"input":"<a a=a&>foo",
"output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]},
{"description":"plaintext element",
"input":"<plaintext>foobar",
"output":[["StartTag","plaintext",{}], ["Character","foobar"]]},
{"description":"Open angled bracket in unquoted attribute value state",
"input":"<a a=f<>",
"output":["ParseError", ["StartTag", "a", {"a":"f<"}]]}
]}

View File

@@ -1,179 +0,0 @@
{"tests": [
{"description":"DOCTYPE without name",
"input":"<!DOCTYPE>",
"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
{"description":"DOCTYPE without space before name",
"input":"<!DOCTYPEhtml>",
"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
{"description":"Incorrect DOCTYPE without a space before name",
"input":"<!DOCTYPEfoo>",
"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
{"description":"DOCTYPE with publicId",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
{"description":"DOCTYPE with EOF after PUBLIC",
"input":"<!DOCTYPE html PUBLIC",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"DOCTYPE with EOF after PUBLIC '",
"input":"<!DOCTYPE html PUBLIC '",
"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
{"description":"DOCTYPE with EOF after PUBLIC 'x",
"input":"<!DOCTYPE html PUBLIC 'x",
"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
{"description":"DOCTYPE with systemId",
"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
{"description":"DOCTYPE with publicId and systemId",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
{"description":"DOCTYPE with > in double-quoted publicId",
"input":"<!DOCTYPE html PUBLIC \">x",
"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
{"description":"DOCTYPE with > in single-quoted publicId",
"input":"<!DOCTYPE html PUBLIC '>x",
"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
{"description":"DOCTYPE with > in double-quoted systemId",
"input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
{"description":"DOCTYPE with > in single-quoted systemId",
"input":"<!DOCTYPE html PUBLIC 'foo' '>x",
"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
{"description":"Incomplete doctype",
"input":"<!DOCTYPE html ",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Numeric entity representing the NUL character",
"input":"&#0000;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity representing the NUL character",
"input":"&#x0000;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#2225222;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#x1010FFFF;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity pair representing a surrogate pair",
"input":"&#xD869;&#xDED6;",
"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
"input":"&#xaBcD;",
"output":[["Character", "\uABCD"]]},
{"description":"Entity without a name",
"input":"&;",
"output":["ParseError", ["Character", "&;"]]},
{"description":"Unescaped ampersand in attribute value",
"input":"<h a='&'>",
"output":[["StartTag", "h", { "a":"&" }]]},
{"description":"StartTag containing <",
"input":"<a<b>",
"output":[["StartTag", "a<b", { }]]},
{"description":"Non-void element containing trailing /",
"input":"<h/>",
"output":[["StartTag","h",{},true]]},
{"description":"Void element with permitted slash",
"input":"<br/>",
"output":[["StartTag","br",{},true]]},
{"description":"Void element with permitted slash (with attribute)",
"input":"<br foo='bar'/>",
"output":[["StartTag","br",{"foo":"bar"},true]]},
{"description":"StartTag containing /",
"input":"<h/a='b'>",
"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
{"description":"Double-quoted attribute value",
"input":"<h a=\"b\">",
"output":[["StartTag", "h", { "a":"b" }]]},
{"description":"Unescaped </",
"input":"</",
"output":["ParseError", ["Character", "</"]]},
{"description":"Illegal end tag name",
"input":"</1>",
"output":["ParseError", ["Comment", "1"]]},
{"description":"Simili processing instruction",
"input":"<?namespace>",
"output":["ParseError", ["Comment", "?namespace"]]},
{"description":"A bogus comment stops at >, even if preceeded by two dashes",
"input":"<?foo-->",
"output":["ParseError", ["Comment", "?foo--"]]},
{"description":"Unescaped <",
"input":"foo < bar",
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
{"description":"Null Byte Replacement",
"input":"\u0000",
"output":["ParseError", ["Character", "\u0000"]]},
{"description":"Comment with dash",
"input":"<!---x",
"output":["ParseError", ["Comment", "-x"]]},
{"description":"Entity + newline",
"input":"\nx\n&gt;\n",
"output":[["Character","\nx\n>\n"]]},
{"description":"Start tag with no attributes but space before the greater-than sign",
"input":"<h >",
"output":[["StartTag", "h", {}]]},
{"description":"Empty attribute followed by uppercase attribute",
"input":"<h a B=''>",
"output":[["StartTag", "h", {"a":"", "b":""}]]},
{"description":"Double-quote after attribute name",
"input":"<h a \">",
"output":["ParseError", ["StartTag", "h", {"a":"", "\"":""}]]},
{"description":"Single-quote after attribute name",
"input":"<h a '>",
"output":["ParseError", ["StartTag", "h", {"a":"", "'":""}]]},
{"description":"Empty end tag with following characters",
"input":"a</>bc",
"output":[["Character", "a"], "ParseError", ["Character", "bc"]]},
{"description":"Empty end tag with following tag",
"input":"a</><b>c",
"output":[["Character", "a"], "ParseError", ["StartTag", "b", {}], ["Character", "c"]]},
{"description":"Empty end tag with following comment",
"input":"a</><!--b-->c",
"output":[["Character", "a"], "ParseError", ["Comment", "b"], ["Character", "c"]]},
{"description":"Empty end tag with following end tag",
"input":"a</></b>c",
"output":[["Character", "a"], "ParseError", ["EndTag", "b"], ["Character", "c"]]}
]}

File diff suppressed because it is too large Load Diff

View File

@@ -1,344 +0,0 @@
{"tests": [
{"description":"< in attribute name",
"input":"<z/0 <>",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
{"description":"< in attribute value",
"input":"<z x=<>",
"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
{"description":"= in unquoted attribute value",
"input":"<z z=z=z>",
"output":["ParseError", ["StartTag", "z", {"z": "z=z"}]]},
{"description":"= attribute",
"input":"<z =>",
"output":["ParseError", ["StartTag", "z", {"=": ""}]]},
{"description":"== attribute",
"input":"<z ==>",
"output":["ParseError", "ParseError", ["StartTag", "z", {"=": ""}]]},
{"description":"=== attribute",
"input":"<z ===>",
"output":["ParseError", "ParseError", ["StartTag", "z", {"=": "="}]]},
{"description":"==== attribute",
"input":"<z ====>",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]]},
{"description":"Allowed \" after ampersand in attribute value",
"input":"<z z=\"&\">",
"output":[["StartTag", "z", {"z": "&"}]]},
{"description":"Non-allowed ' after ampersand in attribute value",
"input":"<z z=\"&'\">",
"output":["ParseError", ["StartTag", "z", {"z": "&'"}]]},
{"description":"Allowed ' after ampersand in attribute value",
"input":"<z z='&'>",
"output":[["StartTag", "z", {"z": "&"}]]},
{"description":"Non-allowed \" after ampersand in attribute value",
"input":"<z z='&\"'>",
"output":["ParseError", ["StartTag", "z", {"z": "&\""}]]},
{"description":"Text after bogus character reference",
"input":"<z z='&xlink_xmlns;'>bar<z>",
"output":["ParseError",["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
{"description":"Text after hex character reference",
"input":"<z z='&#x0020; foo'>bar<z>",
"output":[["StartTag","z",{"z":" foo"}],["Character","bar"],["StartTag","z",{}]]},
{"description":"Attribute name starting with \"",
"input":"<foo \"='bar'>",
"output":["ParseError", ["StartTag", "foo", {"\"": "bar"}]]},
{"description":"Attribute name starting with '",
"input":"<foo '='bar'>",
"output":["ParseError", ["StartTag", "foo", {"'": "bar"}]]},
{"description":"Attribute name containing \"",
"input":"<foo a\"b='bar'>",
"output":["ParseError", ["StartTag", "foo", {"a\"b": "bar"}]]},
{"description":"Attribute name containing '",
"input":"<foo a'b='bar'>",
"output":["ParseError", ["StartTag", "foo", {"a'b": "bar"}]]},
{"description":"Unquoted attribute value containing '",
"input":"<foo a=b'c>",
"output":["ParseError", ["StartTag", "foo", {"a": "b'c"}]]},
{"description":"Unquoted attribute value containing \"",
"input":"<foo a=b\"c>",
"output":["ParseError", ["StartTag", "foo", {"a": "b\"c"}]]},
{"description":"Double-quoted attribute value not followed by whitespace",
"input":"<foo a=\"b\"c>",
"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
{"description":"Single-quoted attribute value not followed by whitespace",
"input":"<foo a='b'c>",
"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
{"description":"Quoted attribute followed by permitted /",
"input":"<br a='b'/>",
"output":[["StartTag","br",{"a":"b"},true]]},
{"description":"Quoted attribute followed by non-permitted /",
"input":"<bar a='b'/>",
"output":[["StartTag","bar",{"a":"b"},true]]},
{"description":"CR EOF after doctype name",
"input":"<!doctype html \r",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"CR EOF in tag name",
"input":"<z\r",
"output":["ParseError"]},
{"description":"Slash EOF in tag name",
"input":"<z/",
"output":["ParseError"]},
{"description":"Zero hex numeric entity",
"input":"&#x0",
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
{"description":"Zero decimal numeric entity",
"input":"&#0",
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
{"description":"Zero-prefixed hex numeric entity",
"input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
"output":[["Character", "A"]]},
{"description":"Zero-prefixed decimal numeric entity",
"input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
"output":[["Character", "A"]]},
{"description":"Empty hex numeric entities",
"input":"&#x &#X ",
"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
{"description":"Empty decimal numeric entities",
"input":"&# &#; ",
"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
{"description":"Non-BMP numeric entity",
"input":"&#x10000;",
"output":[["Character", "\uD800\uDC00"]]},
{"description":"Maximum non-BMP numeric entity",
"input":"&#X10FFFF;",
"output":["ParseError", ["Character", "\uDBFF\uDFFF"]]},
{"description":"Above maximum numeric entity",
"input":"&#x110000;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"32-bit hex numeric entity",
"input":"&#x80000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"33-bit hex numeric entity",
"input":"&#x100000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"33-bit decimal numeric entity",
"input":"&#4294967361;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"65-bit hex numeric entity",
"input":"&#x10000000000000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"65-bit decimal numeric entity",
"input":"&#18446744073709551681;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Surrogate code point edge cases",
"input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
{"description":"Uppercase start tag name",
"input":"<X>",
"output":[["StartTag", "x", {}]]},
{"description":"Uppercase end tag name",
"input":"</X>",
"output":[["EndTag", "x"]]},
{"description":"Uppercase attribute name",
"input":"<x X>",
"output":[["StartTag", "x", { "x":"" }]]},
{"description":"Tag/attribute name case edge values",
"input":"<x@AZ[`az{ @AZ[`az{>",
"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
{"description":"Duplicate different-case attributes",
"input":"<x x=1 x=2 X=3>",
"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
{"description":"Uppercase close tag attributes",
"input":"</x X>",
"output":["ParseError", ["EndTag", "x"]]},
{"description":"Duplicate close tag attributes",
"input":"</x x x>",
"output":["ParseError", "ParseError", ["EndTag", "x"]]},
{"description":"Permitted slash",
"input":"<br/>",
"output":[["StartTag","br",{},true]]},
{"description":"Non-permitted slash",
"input":"<xr/>",
"output":[["StartTag","xr",{},true]]},
{"description":"Permitted slash but in close tag",
"input":"</br/>",
"output":["ParseError", ["EndTag", "br"]]},
{"description":"Doctype public case-sensitivity (1)",
"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
"output":[["DOCTYPE", "html", "AbC", "XyZ", true]]},
{"description":"Doctype public case-sensitivity (2)",
"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
"output":[["DOCTYPE", "html", "aBc", "xYz", true]]},
{"description":"Doctype system case-sensitivity (1)",
"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
"output":[["DOCTYPE", "html", null, "XyZ", true]]},
{"description":"Doctype system case-sensitivity (2)",
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
"output":[["DOCTYPE", "html", null, "xYz", true]]},
{"description":"U+0000 in lookahead region after non-matching character",
"input":"<!doc>\u0000",
"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\u0000"]],
"ignoreErrorOrder":true},
{"description":"U+0000 in lookahead region",
"input":"<!doc\u0000",
"output":["ParseError", ["Comment", "doc\uFFFD"]],
"ignoreErrorOrder":true},
{"description":"U+0080 in lookahead region",
"input":"<!doc\u0080",
"output":["ParseError", "ParseError", ["Comment", "doc\u0080"]],
"ignoreErrorOrder":true},
{"description":"U+FDD1 in lookahead region",
"input":"<!doc\uFDD1",
"output":["ParseError", "ParseError", ["Comment", "doc\uFDD1"]],
"ignoreErrorOrder":true},
{"description":"U+1FFFF in lookahead region",
"input":"<!doc\uD83F\uDFFF",
"output":["ParseError", "ParseError", ["Comment", "doc\uD83F\uDFFF"]],
"ignoreErrorOrder":true},
{"description":"CR followed by non-LF",
"input":"\r?",
"output":[["Character", "\n?"]]},
{"description":"CR at EOF",
"input":"\r",
"output":[["Character", "\n"]]},
{"description":"LF at EOF",
"input":"\n",
"output":[["Character", "\n"]]},
{"description":"CR LF",
"input":"\r\n",
"output":[["Character", "\n"]]},
{"description":"CR CR",
"input":"\r\r",
"output":[["Character", "\n\n"]]},
{"description":"LF LF",
"input":"\n\n",
"output":[["Character", "\n\n"]]},
{"description":"LF CR",
"input":"\n\r",
"output":[["Character", "\n\n"]]},
{"description":"text CR CR CR text",
"input":"text\r\r\rtext",
"output":[["Character", "text\n\n\ntext"]]},
{"description":"Doctype publik",
"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype publi",
"input":"<!DOCTYPE html PUBLI",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype sistem",
"input":"<!DOCTYPE html SISTEM \"AbC\">",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype sys",
"input":"<!DOCTYPE html SYS",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype html x>text",
"input":"<!DOCTYPE html x>text",
"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]},
{"description":"Grave accent in unquoted attribute",
"input":"<a a=aa`>",
"output":["ParseError", ["StartTag", "a", {"a":"aa`"}]]},
{"description":"EOF in tag name state ",
"input":"<a",
"output":["ParseError"]},
{"description":"EOF in tag name state",
"input":"<a",
"output":["ParseError"]},
{"description":"EOF in before attribute name state",
"input":"<a ",
"output":["ParseError"]},
{"description":"EOF in attribute name state",
"input":"<a a",
"output":["ParseError"]},
{"description":"EOF in after attribute name state",
"input":"<a a ",
"output":["ParseError"]},
{"description":"EOF in before attribute value state",
"input":"<a a =",
"output":["ParseError"]},
{"description":"EOF in attribute value (double quoted) state",
"input":"<a a =\"a",
"output":["ParseError"]},
{"description":"EOF in attribute value (single quoted) state",
"input":"<a a ='a",
"output":["ParseError"]},
{"description":"EOF in attribute value (unquoted) state",
"input":"<a a =a",
"output":["ParseError"]},
{"description":"EOF in after attribute value state",
"input":"<a a ='a'",
"output":["ParseError"]}
]}

File diff suppressed because it is too large Load Diff

View File

@@ -1,27 +0,0 @@
{"tests" : [
{"description": "Invalid Unicode character U+DFFF",
"doubleEscaped":true,
"input": "\\uDFFF",
"output":["ParseError", ["Character", "\\uFFFD"]]},
{"description": "Invalid Unicode character U+D800",
"doubleEscaped":true,
"input": "\\uD800",
"output":["ParseError", ["Character", "\\uFFFD"]]},
{"description": "Invalid Unicode character U+DFFF with valid preceding character",
"doubleEscaped":true,
"input": "a\\uDFFF",
"output":["ParseError", ["Character", "a\\uFFFD"]]},
{"description": "Invalid Unicode character U+D800 with valid following character",
"doubleEscaped":true,
"input": "\\uD800a",
"output":["ParseError", ["Character", "\\uFFFDa"]]},
{"description":"CR followed by U+0000",
"input":"\r\u0000",
"output":[["Character", "\n"], "ParseError", ["Character", "\u0000"]],
"ignoreErrorOrder":true}
]
}

View File

@@ -1,22 +0,0 @@
{"xmlViolationTests": [
{"description":"Non-XML character",
"input":"a\uFFFFb",
"ignoreErrorOrder":true,
"output":["ParseError",["Character","a\uFFFDb"]]},
{"description":"Non-XML space",
"input":"a\u000Cb",
"ignoreErrorOrder":true,
"output":[["Character","a b"]]},
{"description":"Double hyphen in comment",
"input":"<!-- foo -- bar -->",
"output":["ParseError",["Comment"," foo - - bar "]]},
{"description":"FF between attributes",
"input":"<a b=''\u000Cc=''>",
"output":[["StartTag","a",{"b":"","c":""}]]}
]}

View File

@@ -1,194 +0,0 @@
#data
<a><p></a></p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <p>
| <a>
#data
<a>1<p>2</a>3</p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <p>
| <a>
| "2"
| "3"
#data
<a>1<button>2</a>3</button>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <button>
| <a>
| "2"
| "3"
#data
<a>1<b>2</a>3</b>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <b>
| "2"
| <b>
| "3"
#data
<a>1<div>2<div>3</a>4</div>5</div>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <div>
| <a>
| "2"
| <div>
| <a>
| "3"
| "4"
| "5"
#data
<table><a>1<p>2</a>3</p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <p>
| <a>
| "2"
| "3"
| <table>
#data
<b><b><a><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <b>
| <b>
| <a>
| <p>
| <a>
#data
<b><a><b><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <b>
| <a>
| <b>
| <b>
| <p>
| <a>
#data
<a><b><b><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <b>
| <b>
| <p>
| <a>
#data
<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| "1"
| <s>
| id="A"
| "2"
| <b>
| id="B"
| "3"
| <s>
| id="A"
| <b>
| id="B"
| "4"
| <b>
| id="B"
| "5"
#data
<table><a>1<td>2</td>3</table>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <a>
| "3"
| <table>
| <tbody>
| <tr>
| <td>
| "2"
#data
<table>A<td>B</td>C</table>
#errors
#document
| <html>
| <head>
| <body>
| "AC"
| <table>
| <tbody>
| <tr>
| <td>
| "B"
#data
<a><svg><tr><input></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <svg svg>
| <svg tr>
| <svg input>

View File

@@ -1,31 +0,0 @@
#data
<b>1<i>2<p>3</b>4
#errors
#document
| <html>
| <head>
| <body>
| <b>
| "1"
| <i>
| "2"
| <i>
| <p>
| <b>
| "3"
| "4"
#data
<a><div><style></style><address><a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <div>
| <a>
| <style>
| <address>
| <a>
| <a>

View File

@@ -1,135 +0,0 @@
#data
FOO<!-- BAR -->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -->
| "BAZ"
#data
FOO<!-- BAR --!>BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -->
| "BAZ"
#data
FOO<!-- BAR -- >BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- >BAZ -->
#data
FOO<!-- BAR -- <QUX> -- MUX -->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -->
| "BAZ"
#data
FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -->
| "BAZ"
#data
FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -- >BAZ -->
#data
FOO<!---->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
FOO<!--->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
FOO<!-->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
<?xml version="1.0">Hi
#errors
#document
| <!-- ?xml version="1.0" -->
| <html>
| <head>
| <body>
| "Hi"
#data
<?xml version="1.0">
#errors
#document
| <!-- ?xml version="1.0" -->
| <html>
| <head>
| <body>
#data
<?xml version
#errors
#document
| <!-- ?xml version -->
| <html>
| <head>
| <body>
#data
FOO<!----->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- - -->
| "BAZ"

View File

@@ -1,370 +0,0 @@
#data
<!DOCTYPE html>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!dOctYpE HtMl>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPEhtml>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE>Hello
#errors
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE >Hello
#errors
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato taco>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato taco "ddd>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM ggg>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM taco >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM 'taco"'>Hello
#errors
#document
| <!DOCTYPE potato "" "taco"">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM "taco">Hello
#errors
#document
| <!DOCTYPE potato "" "taco">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM "tai'co">Hello
#errors
#document
| <!DOCTYPE potato "" "tai'co">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEMtaco "ddd">Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato grass SYSTEM taco>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIc>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIc >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIcgoof>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC goof>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC "go'of">Hello
#errors
#document
| <!DOCTYPE potato "go'of" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC 'go'of'>Hello
#errors
#document
| <!DOCTYPE potato "go" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC 'go:hh of' >Hello
#errors
#document
| <!DOCTYPE potato "go:hh of" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
#errors
#document
| <!DOCTYPE potato "W3C-//dfdf" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">Hello
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE ...>Hello
#errors
#document
| <!DOCTYPE ...>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [
<!-- internal declarations -->
]>
#errors
#document
| <!DOCTYPE root-element>
| <html>
| <head>
| <body>
| "]>"
#data
<!DOCTYPE html PUBLIC
"-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
"http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
#errors
#document
| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
#errors
#document
| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
| <html>
| <head>
| <body>
| <b>
| "Mine!"
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>

View File

@@ -1,603 +0,0 @@
#data
FOO&gt;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>BAR"
#data
FOO&gtBAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>BAR"
#data
FOO&gt BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO> BAR"
#data
FOO&gt;;;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>;;BAR"
#data
I'm &notit; I tell you
#errors
#document
| <html>
| <head>
| <body>
| "I'm ¬it; I tell you"
#data
I'm &notin; I tell you
#errors
#document
| <html>
| <head>
| <body>
| "I'm ∉ I tell you"
#data
FOO& BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO& BAR"
#data
FOO&<BAR>
#errors
#document
| <html>
| <head>
| <body>
| "FOO&"
| <bar>
#data
FOO&&&&gt;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO&&&>BAR"
#data
FOO&#41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO)BAR"
#data
FOO&#x41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOABAR"
#data
FOO&#X41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOABAR"
#data
FOO&#BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#BAR"
#data
FOO&#ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#ZOO"
#data
FOO&#xBAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOºR"
#data
FOO&#xZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#xZOO"
#data
FOO&#XZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#XZOO"
#data
FOO&#41BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO)BAR"
#data
FOO&#x41BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO䆺R"
#data
FOO&#x41ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOAZOO"
#data
FOO&#x0000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#x0078;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOxZOO"
#data
FOO&#x0079;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOyZOO"
#data
FOO&#x0080;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO€ZOO"
#data
FOO&#x0081;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0082;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0083;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOƒZOO"
#data
FOO&#x0084;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO„ZOO"
#data
FOO&#x0085;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO…ZOO"
#data
FOO&#x0086;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO†ZOO"
#data
FOO&#x0087;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO‡ZOO"
#data
FOO&#x0088;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOˆZOO"
#data
FOO&#x0089;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO‰ZOO"
#data
FOO&#x008A;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŠZOO"
#data
FOO&#x008B;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x008C;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŒZOO"
#data
FOO&#x008D;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x008E;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŽZOO"
#data
FOO&#x008F;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0090;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0091;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0092;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0093;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO“ZOO"
#data
FOO&#x0094;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO”ZOO"
#data
FOO&#x0095;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO•ZOO"
#data
FOO&#x0096;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0097;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO—ZOO"
#data
FOO&#x0098;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO˜ZOO"
#data
FOO&#x0099;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO™ZOO"
#data
FOO&#x009A;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOšZOO"
#data
FOO&#x009B;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x009C;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOœZOO"
#data
FOO&#x009D;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x009E;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOžZOO"
#data
FOO&#x009F;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŸZOO"
#data
FOO&#x00A0;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO ZOO"
#data
FOO&#xD7FF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO퟿ZOO"
#data
FOO&#xD800;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xD801;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xDFFE;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xDFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xE000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x10FFFE;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􏿾ZOO"
#data
FOO&#x1087D4;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􈟔ZOO"
#data
FOO&#x10FFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􏿿ZOO"
#data
FOO&#x110000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xFFFFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"

View File

@@ -1,249 +0,0 @@
#data
<div bar="ZZ&gt;YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>YY"
#data
<div bar="ZZ&"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar='ZZ&'></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar=ZZ&></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar="ZZ&gt=YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt=YY"
#data
<div bar="ZZ&gt0YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt0YY"
#data
<div bar="ZZ&gt9YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt9YY"
#data
<div bar="ZZ&gtaYY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gtaYY"
#data
<div bar="ZZ&gtZYY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gtZYY"
#data
<div bar="ZZ&gt YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ> YY"
#data
<div bar="ZZ&gt"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar='ZZ&gt'></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar=ZZ&gt></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar="ZZ&pound_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ£_id=23"
#data
<div bar="ZZ&prod_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&prod_id=23"
#data
<div bar="ZZ&pound;_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ£_id=23"
#data
<div bar="ZZ&prod;_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ∏_id=23"
#data
<div bar="ZZ&pound=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&pound=23"
#data
<div bar="ZZ&prod=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&prod=23"
#data
<div>ZZ&pound_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£_id=23"
#data
<div>ZZ&prod_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ&prod_id=23"
#data
<div>ZZ&pound;_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£_id=23"
#data
<div>ZZ&prod;_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ∏_id=23"
#data
<div>ZZ&pound=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£=23"
#data
<div>ZZ&prod=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ&prod=23"

View File

@@ -1,246 +0,0 @@
#data
<div<div>
#errors
#document
| <html>
| <head>
| <body>
| <div<div>
#data
<div foo<bar=''>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo<bar=""
#data
<div foo=`bar`>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo="`bar`"
#data
<div \"foo=''>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| \"foo=""
#data
<a href='\nbar'></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| href="\nbar"
#data
<!DOCTYPE html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
&lang;&rang;
#errors
#document
| <html>
| <head>
| <body>
| "⟨⟩"
#data
&apos;
#errors
#document
| <html>
| <head>
| <body>
| "'"
#data
&ImaginaryI;
#errors
#document
| <html>
| <head>
| <body>
| ""
#data
&Kopf;
#errors
#document
| <html>
| <head>
| <body>
| "𝕂"
#data
&notinva;
#errors
#document
| <html>
| <head>
| <body>
| "∉"
#data
<?import namespace="foo" implementation="#bar">
#errors
#document
| <!-- ?import namespace="foo" implementation="#bar" -->
| <html>
| <head>
| <body>
#data
<!--foo--bar-->
#errors
#document
| <!-- foo--bar -->
| <html>
| <head>
| <body>
#data
<![CDATA[x]]>
#errors
#document
| <!-- [CDATA[x]] -->
| <html>
| <head>
| <body>
#data
<textarea><!--</textarea>--></textarea>
#errors
#document
| <html>
| <head>
| <body>
| <textarea>
| "<!--"
| "-->"
#data
<textarea><!--</textarea>-->
#errors
#document
| <html>
| <head>
| <body>
| <textarea>
| "<!--"
| "-->"
#data
<style><!--</style>--></style>
#errors
#document
| <html>
| <head>
| <style>
| "<!--"
| <body>
| "-->"
#data
<style><!--</style>-->
#errors
#document
| <html>
| <head>
| <style>
| "<!--"
| <body>
| "-->"
#data
<ul><li>A </li> <li>B</li></ul>
#errors
#document
| <html>
| <head>
| <body>
| <ul>
| <li>
| "A "
| " "
| <li>
| "B"
#data
<table><form><input type=hidden><input></form><div></div></table>
#errors
#document
| <html>
| <head>
| <body>
| <input>
| <div>
| <table>
| <form>
| <input>
| type="hidden"
#data
<i>A<b>B<p></i>C</b>D
#errors
#document
| <html>
| <head>
| <body>
| <i>
| "A"
| <b>
| "B"
| <b>
| <p>
| <b>
| <i>
| "C"
| "D"
#data
<div></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
#data
<svg></svg>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<math></math>
#errors
#document
| <html>
| <head>
| <body>
| <math math>

View File

@@ -1,43 +0,0 @@
#data
<button>1</foo>
#errors
#document
| <html>
| <head>
| <body>
| <button>
| "1"
#data
<foo>1<p>2</foo>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| "1"
| <p>
| "2"
#data
<dd>1</foo>
#errors
#document
| <html>
| <head>
| <body>
| <dd>
| "1"
#data
<foo>1<dd>2</foo>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| "1"
| <dd>
| "2"

View File

@@ -1,40 +0,0 @@
#data
<isindex>
#errors
#document
| <html>
| <head>
| <body>
| <form>
| <hr>
| <label>
| "This is a searchable index. Enter search keywords: "
| <input>
| name="isindex"
| <hr>
#data
<isindex name="A" action="B" prompt="C" foo="D">
#errors
#document
| <html>
| <head>
| <body>
| <form>
| action="B"
| <hr>
| <label>
| "C"
| <input>
| foo="D"
| name="isindex"
| <hr>
#data
<form><isindex>
#errors
#document
| <html>
| <head>
| <body>
| <form>

View File

@@ -1,52 +0,0 @@
#data
<input type="hidden"><frameset>
#errors
21: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
31: “frameset” start tag seen.
31: End of file seen and there were open elements.
#document
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><table><caption><svg>foo</table>bar
#errors
47: End tag “table” did not match the name of the current open element (“svg”).
47: “table” closed but “caption” was still open.
47: End tag “table” seen, but there were open elements.
36: Unclosed element “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| "foo"
| "bar"
#data
<table><tr><td><svg><desc><td></desc><circle>
#errors
7: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
30: A table cell was implicitly closed, but there were open elements.
26: Unclosed element “desc”.
20: Unclosed element “svg”.
37: Stray end tag “desc”.
45: End of file seen and there were open elements.
45: Unclosed element “circle”.
7: Unclosed element “table”.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg desc>
| <td>
| <circle>

View File

@@ -1,308 +0,0 @@
#data
FOO<script>'Hello'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'Hello'"
| "BAR"
#data
FOO<script></script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script >BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script/>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script/ >BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script type="text/plain"></scriptx>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "</scriptx>BAR"
#data
FOO<script></script foo=">" dd>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script>'<'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<'"
| "BAR"
#data
FOO<script>'<!'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!'"
| "BAR"
#data
FOO<script>'<!-'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-'"
| "BAR"
#data
FOO<script>'<!--'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!--'"
| "BAR"
#data
FOO<script>'<!---'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!---'"
| "BAR"
#data
FOO<script>'<!-->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-->'"
| "BAR"
#data
FOO<script>'<!-->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-->'"
| "BAR"
#data
FOO<script>'<!-- potato'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- potato'"
| "BAR"
#data
FOO<script>'<!-- <sCrIpt'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- <sCrIpt'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt>'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> -'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> --'</script>BAR"
#data
FOO<script>'<!-- <sCrIpt> -->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- <sCrIpt> -->'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> --!>'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> -- >'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt '</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt/'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt\'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt/'</script>BAR"
| "QUX"

View File

@@ -1,212 +0,0 @@
#data
<table><th>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <th>
#data
<table><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><col foo='bar'>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <colgroup>
| <col>
| foo="bar"
#data
<table><colgroup></html>foo
#errors
#document
| <html>
| <head>
| <body>
| "foo"
| <table>
| <colgroup>
#data
<table></table><p>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <p>
| "foo"
#data
<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><select><option>3</select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "3"
| <table>
#data
<table><select><table></table></select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <table>
| <table>
#data
<table><select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <table>
#data
<table><select><option>A<tr><td>B</td></tr></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "A"
| <table>
| <tbody>
| <tr>
| <td>
| "B"
#data
<table><td></body></caption></col></colgroup></html>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "foo"
#data
<table><td>A</table>B
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "A"
| "B"
#data
<table><tr><caption>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <caption>
#data
<table><tr></body></caption></col></colgroup></html></td></th><td>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "foo"
#data
<table><td><tr>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <tr>
#data
<table><td><button><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <button>
| <td>
#data
<table><tr><td><svg><desc><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg desc>
| <td>

File diff suppressed because it is too large Load Diff

View File

@@ -1,799 +0,0 @@
#data
<!DOCTYPE html><svg></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><svg></svg><![CDATA[a]]>
#errors
29: Bogus comment
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <!-- [CDATA[a]] -->
#data
<!DOCTYPE html><body><svg></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><body><select><svg></svg></select>
#errors
35: Stray “svg” start tag.
42: Stray end tag “svg”
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!DOCTYPE html><body><select><option><svg></svg></option></select>
#errors
43: Stray “svg” start tag.
50: Stray end tag “svg”
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <option>
#data
<!DOCTYPE html><body><table><svg></svg></table>
#errors
34: Start tag “svg” seen in “table”.
41: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <table>
#data
<!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
#errors
34: Start tag “svg” seen in “table”.
46: Stray end tag “g”.
53: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <table>
#data
<!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
#errors
34: Start tag “svg” seen in “table”.
46: Stray end tag “g”.
58: Stray end tag “g”.
65: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
#data
<!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
#errors
41: Start tag “svg” seen in “table”.
53: Stray end tag “g”.
65: Stray end tag “g”.
72: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
| <tbody>
#data
<!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
#errors
45: Start tag “svg” seen in “table”.
57: Stray end tag “g”.
69: Stray end tag “g”.
76: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
| <tbody>
| <tr>
#data
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
#data
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg><p>baz</td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg><p>baz</caption></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
70: HTML start tag “p” in a foreign namespace context.
81: “table” closed but “caption” was still open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table><p>quux
#errors
78: “table” closed but “caption” was still open.
78: Unclosed elements on stack.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
44: Start tag “svg” seen in “table”.
56: Stray end tag “g”.
68: Stray end tag “g”.
71: HTML start tag “p” in a foreign namespace context.
71: Start tag “p” seen in “table”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
| <table>
| <colgroup>
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
50: Stray “svg” start tag.
54: Stray “g” start tag.
62: Stray end tag “g”
66: Stray “g” start tag.
74: Stray end tag “g”
77: Stray “p” start tag.
88: “table” end tag with “select” open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <select>
| "foobarbaz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
36: Start tag “select” seen in “table”.
42: Stray “svg” start tag.
46: Stray “g” start tag.
54: Stray end tag “g”
58: Stray “g” start tag.
66: Stray end tag “g”
69: Stray “p” start tag.
80: “table” end tag with “select” open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "foobarbaz"
| <table>
| <p>
| "quux"
#data
<!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g><p>baz
#errors
41: Stray “svg” start tag.
68: HTML start tag “p” in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g><p>baz
#errors
34: Stray “svg” start tag.
61: HTML start tag “p” in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><frameset><svg><g></g><g></g><p><span>
#errors
31: Stray “svg” start tag.
35: Stray “g” start tag.
40: Stray end tag “g”
44: Stray “g” start tag.
49: Stray end tag “g”
52: Stray “p” start tag.
58: Stray “span” start tag.
58: End of file seen and there were open elements.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><frameset></frameset><svg><g></g><g></g><p><span>
#errors
42: Stray “svg” start tag.
46: Stray “g” start tag.
51: Stray end tag “g”
55: Stray “g” start tag.
60: Stray end tag “g”
63: Stray “p” start tag.
69: Stray “span” start tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| <svg svg>
| xlink href="foo"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
| "bar"
#data
<svg></path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<div><svg></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| "a"
#data
<div><svg><path></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| "a"
#data
<div><svg><path></svg><path>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <path>
#data
<div><svg><path><foreignObject><math></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <math math>
| "a"
#data
<div><svg><path><foreignObject><p></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <p>
| "a"
#data
<!DOCTYPE html><svg><desc><div><svg><ul>a
#errors
40: HTML start tag “ul” in a foreign namespace context.
41: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg desc>
| <div>
| <svg svg>
| <ul>
| "a"
#data
<!DOCTYPE html><svg><desc><svg><ul>a
#errors
35: HTML start tag “ul” in a foreign namespace context.
36: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg desc>
| <svg svg>
| <ul>
| "a"
#data
<!DOCTYPE html><p><svg><desc><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <svg svg>
| <svg desc>
| <p>
#data
<!DOCTYPE html><p><svg><title><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <svg svg>
| <svg title>
| <p>
#data
<div><svg><path><foreignObject><p></foreignObject><p>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <p>
| <p>
#data
<math><mi><div><object><div><span></span></div></object></div></mi><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <div>
| <object>
| <div>
| <span>
| <math mi>
#data
<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <svg svg>
| <svg foreignObject>
| <div>
| <div>
| <math mi>
#data
<svg><script></script><path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg script>
| <svg path>
#data
<table><svg></svg><tr>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <table>
| <tbody>
| <tr>
#data
<math><mi><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <math mglyph>
#data
<math><mi><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <math malignmark>
#data
<math><mo><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mo>
| <math mglyph>
#data
<math><mo><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mo>
| <math malignmark>
#data
<math><mn><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mn>
| <math mglyph>
#data
<math><mn><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mn>
| <math malignmark>
#data
<math><ms><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math ms>
| <math mglyph>
#data
<math><ms><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math ms>
| <math malignmark>
#data
<math><mtext><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <math mglyph>
#data
<math><mtext><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <math malignmark>
#data
<math><annotation-xml><svg></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <math mi>
#data
<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <svg foreignObject>
| <div>
| <math math>
| <math mi>
| <span>
| <svg path>
| <math mi>
#data
<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <svg foreignObject>
| <math math>
| <math mi>
| <svg svg>
| <math mo>
| <span>
| <svg path>
| <math mi>

View File

@@ -1,482 +0,0 @@
#data
<!DOCTYPE html><body><svg attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| contentScriptType=""
| contentStyleType=""
| diffuseConstant=""
| edgeMode=""
| externalResourcesRequired=""
| filterRes=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><BODY><SVG ATTRIBUTENAME='' ATTRIBUTETYPE='' BASEFREQUENCY='' BASEPROFILE='' CALCMODE='' CLIPPATHUNITS='' CONTENTSCRIPTTYPE='' CONTENTSTYLETYPE='' DIFFUSECONSTANT='' EDGEMODE='' EXTERNALRESOURCESREQUIRED='' FILTERRES='' FILTERUNITS='' GLYPHREF='' GRADIENTTRANSFORM='' GRADIENTUNITS='' KERNELMATRIX='' KERNELUNITLENGTH='' KEYPOINTS='' KEYSPLINES='' KEYTIMES='' LENGTHADJUST='' LIMITINGCONEANGLE='' MARKERHEIGHT='' MARKERUNITS='' MARKERWIDTH='' MASKCONTENTUNITS='' MASKUNITS='' NUMOCTAVES='' PATHLENGTH='' PATTERNCONTENTUNITS='' PATTERNTRANSFORM='' PATTERNUNITS='' POINTSATX='' POINTSATY='' POINTSATZ='' PRESERVEALPHA='' PRESERVEASPECTRATIO='' PRIMITIVEUNITS='' REFX='' REFY='' REPEATCOUNT='' REPEATDUR='' REQUIREDEXTENSIONS='' REQUIREDFEATURES='' SPECULARCONSTANT='' SPECULAREXPONENT='' SPREADMETHOD='' STARTOFFSET='' STDDEVIATION='' STITCHTILES='' SURFACESCALE='' SYSTEMLANGUAGE='' TABLEVALUES='' TARGETX='' TARGETY='' TEXTLENGTH='' VIEWBOX='' VIEWTARGET='' XCHANNELSELECTOR='' YCHANNELSELECTOR='' ZOOMANDPAN=''></SVG>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| contentScriptType=""
| contentStyleType=""
| diffuseConstant=""
| edgeMode=""
| externalResourcesRequired=""
| filterRes=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><body><svg attributename='' attributetype='' basefrequency='' baseprofile='' calcmode='' clippathunits='' contentscripttype='' contentstyletype='' diffuseconstant='' edgemode='' externalresourcesrequired='' filterres='' filterunits='' glyphref='' gradienttransform='' gradientunits='' kernelmatrix='' kernelunitlength='' keypoints='' keysplines='' keytimes='' lengthadjust='' limitingconeangle='' markerheight='' markerunits='' markerwidth='' maskcontentunits='' maskunits='' numoctaves='' pathlength='' patterncontentunits='' patterntransform='' patternunits='' pointsatx='' pointsaty='' pointsatz='' preservealpha='' preserveaspectratio='' primitiveunits='' refx='' refy='' repeatcount='' repeatdur='' requiredextensions='' requiredfeatures='' specularconstant='' specularexponent='' spreadmethod='' startoffset='' stddeviation='' stitchtiles='' surfacescale='' systemlanguage='' tablevalues='' targetx='' targety='' textlength='' viewbox='' viewtarget='' xchannelselector='' ychannelselector='' zoomandpan=''></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| contentScriptType=""
| contentStyleType=""
| diffuseConstant=""
| edgeMode=""
| externalResourcesRequired=""
| filterRes=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><body><math attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| attributename=""
| attributetype=""
| basefrequency=""
| baseprofile=""
| calcmode=""
| clippathunits=""
| contentscripttype=""
| contentstyletype=""
| diffuseconstant=""
| edgemode=""
| externalresourcesrequired=""
| filterres=""
| filterunits=""
| glyphref=""
| gradienttransform=""
| gradientunits=""
| kernelmatrix=""
| kernelunitlength=""
| keypoints=""
| keysplines=""
| keytimes=""
| lengthadjust=""
| limitingconeangle=""
| markerheight=""
| markerunits=""
| markerwidth=""
| maskcontentunits=""
| maskunits=""
| numoctaves=""
| pathlength=""
| patterncontentunits=""
| patterntransform=""
| patternunits=""
| pointsatx=""
| pointsaty=""
| pointsatz=""
| preservealpha=""
| preserveaspectratio=""
| primitiveunits=""
| refx=""
| refy=""
| repeatcount=""
| repeatdur=""
| requiredextensions=""
| requiredfeatures=""
| specularconstant=""
| specularexponent=""
| spreadmethod=""
| startoffset=""
| stddeviation=""
| stitchtiles=""
| surfacescale=""
| systemlanguage=""
| tablevalues=""
| targetx=""
| targety=""
| textlength=""
| viewbox=""
| viewtarget=""
| xchannelselector=""
| ychannelselector=""
| zoomandpan=""
#data
<!DOCTYPE html><body><svg><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><body><svg><altglyph /><altglyphdef /><altglyphitem /><animatecolor /><animatemotion /><animatetransform /><clippath /><feblend /><fecolormatrix /><fecomponenttransfer /><fecomposite /><feconvolvematrix /><fediffuselighting /><fedisplacementmap /><fedistantlight /><feflood /><fefunca /><fefuncb /><fefuncg /><fefuncr /><fegaussianblur /><feimage /><femerge /><femergenode /><femorphology /><feoffset /><fepointlight /><fespecularlighting /><fespotlight /><fetile /><feturbulence /><foreignobject /><glyphref /><lineargradient /><radialgradient /><textpath /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><BODY><SVG><ALTGLYPH /><ALTGLYPHDEF /><ALTGLYPHITEM /><ANIMATECOLOR /><ANIMATEMOTION /><ANIMATETRANSFORM /><CLIPPATH /><FEBLEND /><FECOLORMATRIX /><FECOMPONENTTRANSFER /><FECOMPOSITE /><FECONVOLVEMATRIX /><FEDIFFUSELIGHTING /><FEDISPLACEMENTMAP /><FEDISTANTLIGHT /><FEFLOOD /><FEFUNCA /><FEFUNCB /><FEFUNCG /><FEFUNCR /><FEGAUSSIANBLUR /><FEIMAGE /><FEMERGE /><FEMERGENODE /><FEMORPHOLOGY /><FEOFFSET /><FEPOINTLIGHT /><FESPECULARLIGHTING /><FESPOTLIGHT /><FETILE /><FETURBULENCE /><FOREIGNOBJECT /><GLYPHREF /><LINEARGRADIENT /><RADIALGRADIENT /><TEXTPATH /></SVG>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><body><math><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math altglyph>
| <math altglyphdef>
| <math altglyphitem>
| <math animatecolor>
| <math animatemotion>
| <math animatetransform>
| <math clippath>
| <math feblend>
| <math fecolormatrix>
| <math fecomponenttransfer>
| <math fecomposite>
| <math feconvolvematrix>
| <math fediffuselighting>
| <math fedisplacementmap>
| <math fedistantlight>
| <math feflood>
| <math fefunca>
| <math fefuncb>
| <math fefuncg>
| <math fefuncr>
| <math fegaussianblur>
| <math feimage>
| <math femerge>
| <math femergenode>
| <math femorphology>
| <math feoffset>
| <math fepointlight>
| <math fespecularlighting>
| <math fespotlight>
| <math fetile>
| <math feturbulence>
| <math foreignobject>
| <math glyphref>
| <math lineargradient>
| <math radialgradient>
| <math textpath>
#data
<!DOCTYPE html><body><svg><solidColor /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg solidcolor>

Some files were not shown because too many files have changed in this diff Show More