The new version of the Module:Citation/CS1 suite deprecates |dead-url=
and |deadurl=
because these two parameters violate the nominal standard that says that parameters ending in -url
hold a url as a value.
Wikitext | {{cite book
|
---|---|
Live | Title. Archived from the original on 2015-05-19. {{cite book}} : Unknown parameter |dead-url= ignored (|url-status= suggested) (help)
|
Sandbox | Title. Archived from the original on 2015-05-19. {{cite book}} : Unknown parameter |dead-url= ignored (|url-status= suggested) (help)
|
Wikitext | {{cite book
|
---|---|
Live | Title. Archived from the original on 2015-05-19. |
Sandbox | Title. Archived from the original on 2015-05-19. |
The purpose of task 16 is to replace various combinations of |dead-url=
and |deadurl=
and their associated keywords with |url-status=
and its appropriate keywords.
description
edit|dead-url=
and |deadurl=
accept a limited set of keywords that control the rendering of cs1|2 citation templates that have archive urls. The keywords that concern this task are:
yes
,y
,true
,no
the remaining keywords, retain their meaning and purpose:
unfit
,usurped
,bot: unknown
Because |url-status=no
and |url-status=yes
(and the other 'positive' keywords) are nonsensical, live
(replacing no
) and dead
(replacing yes
...) have been assigned to this parameter.
Task 16 searches for templates that use either of the |dead-url=
and |deadurl=
parameters (with or without assigned keyword) and then:
- renames the parameter to
url-status
- replaces the assigned keyword
no
withlive
, and replaces the assigned keywordsyes
,y
, andtrue
withdead
; keywordsunfit
,usurped
,bot: unknown
are retained - deletes all empty parameters (will delete an empty
|url-status=
parameter when|archive-url=
is present and has a value)
|dead-url=
and |deadurl=
without an assigned keyword are intentionally included in this process so that the deprecated, and ultimately unsupported, parameters don't linger in article space.
edit summaries
editTask 16 writes an edit summary message that tallies the number of replacements and the number of deletions. The message has the form:
- replaced (n×) / removed (n×) deprecated |dead-url= and |deadurl= with |url-status=;
the edit summary has a link to this page.
ancillary tasks
editDeletes all empty parameters from templates that are repaired.
This task does not do awb general fixes.
script
edit// remove, replace |deadurl= and |dead-url= with |url-status=
// when assigned value is 'yes', 'y', 'true', replace assigned value with 'dead'
// when assigned value is 'no', replace assigned value with 'live'
// when empty, and when |archive-url= is empty, delete
// when empty, and when |archive-url= has a value, retain but do not fill
// use Wikisearch: insource:/\| *dead\-?url *= *[^\|\}]/
public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
Skip = false; // for development, never skip; for the bot set this true then when fixes are made, set it false
string IS_CS1 = @"(?:[Cc]ite[_\-\s]*(?=(?:AV [Mm]edia(?: notes)?)|album\-notes|[Aa][Vv] media|[Aa][Vv] media notes|article|ar[Xx]iv|audio|biorxiv|blog|book|chapter|conference|contribution|dictionary|dissertation|document|DVD|dvd|encyclopa?edia|episode|iucn|image|interview|[Jj]ournal|letter|liner notes|[Mm]agazine|mailing ?list|manual|map|media release|media|newsgroup|newspaper|(?:[Nn]ews(?!group|paper))|[Nn]ew|paper|plaque|podcast|press release|press|publication|pr|radio|report|serial|sign|speech|techreport|thesis|video|url|wb|[Ww]eb|[Ww]ork|act|[Hh]ansard|periodical)|[Cc]itation|[Cc]ite(?=\s*\|)|AIOH|[Cc]it news|[Cc]it web|[Cc]ita web|[Cc]itar notícia|[Cc]itat web|[Cc]ite DANFS|[Cc]ite [Ss]ports\-[Rr]eference|[Cc]ite tweet|[Cc]ite we|[Cc]ite vob|[Cc]w|eFloras|Gilliland|PFAF|PLANTS|SA Rugby Article|Silvics|[Ww]eb cite)";
string pattern;
int fixed_count = 0;
int deleted_count = 0;
int comment_skip = 0;
bool found = false;
//---------------------------< S T A R T >--------------------------------------------------------------------
ArticleText = hide (ArticleText, IS_CS1); // hide all templates that aren't cs1 templates & hide wikilinks
//---------------------------< R E N A M E D E A D U R L >--------------------------------------------------
//
// renames |deadurl= and |dead-url= to |url-status=; replace assigned values. Empty |deadurl= and |dead-url=
// are changed and then deleted because we don't want to leave the deprecated parameter in articles to be copied
// and 'filled in' by well meaning editors.
//
// When |archive-url= has a value and |url-status= is present but empty, leave |url-status= in place.
//
pattern = @"\{\{\s*" + IS_CS1 + @"[^}]*\|\s*dead\-?url\b[^\}]+\}\}"; // cs1|2 template has one of |dead-url= or |deadurl= with or without a value
if (Regex.Match (ArticleText, pattern).Success)
{
ArticleText = Regex.Replace(ArticleText, pattern,
delegate(Match match)
{
string fixed_template; // a fixed citation template is assembled here
string raw_template = match.Groups[0].Value; // the whole citation template; if we can't fix the template then return raw_template
found = true;
//----------
pattern = @"\<!\-\- *(?:Added by DASHBot|DASH[Bb]ot\.?) *\-\->"; // DASHBot is a long-retire bot
fixed_template = Regex.Replace (raw_template, pattern, ""); // delete its signatures
pattern = @"\<!\-\- *Set by H3llBot *\-\->"; // H3llBot is long retired
fixed_template = Regex.Replace (fixed_template, pattern, ""); // delete its signatures
pattern = @"(\| *postscript *= *)\<!\-\- *[Nn]one *\-\->"; // |postscript=<!--None--> does nothing
fixed_template = Regex.Replace (fixed_template, pattern, "$1"); // delete the comment, let empty_param_remove() finish the job
//----------
pattern = @"\<!\-\-";
if (Regex.Match (fixed_template, pattern).Success) // if html comment is found
{
comment_skip++;
return raw_template; // abandon this template
}
pattern = @"(\| *)dead\-?url\b";
fixed_template = Regex.Replace (fixed_template, pattern, "$1url-status"); // replace parameter name
pattern = @"(url\-status\s*=\s*)(?:\b[Yy][Ee][Ss]\b|\b[Yy]\b|\b[Tt][Rr][Uu][Ee]\b)";
fixed_template = Regex.Replace (fixed_template, pattern, "$1dead"); // replace 'yes', 'y', 'true' with 'dead'
pattern = @"(url\-status\s*=\s*)\b[Nn][Oo]\b";
fixed_template = Regex.Replace (fixed_template, pattern, "$1live"); // replace 'no' with 'live'
pattern = @"\|\s*archive\-?url\s*=\s*[/\w]";
if (Regex.Match (fixed_template, pattern).Success) // if |archive-url= is present and has a value
{
pattern = @"(\|\s*url\-status\s*=)(\s*[\|\}])";
fixed_template = Regex.Replace (fixed_template, pattern, "$1__3MP7Y__$2"); // if |url-status= is empty add empty secret word
}
fixed_template = empty_param_remove (fixed_template); // remove all empty parameters from this template
pattern = @"__3MP7Y__";
fixed_template = Regex.Replace (fixed_template, pattern, ""); // remove empty secret word
pattern = @"url\-status";
if (!Regex.Match (fixed_template, pattern).Success)
deleted_count++;
else
fixed_count++;
return fixed_template;
});
}
//---------------------------< F I N I S H >------------------------------------------------------------------
ArticleText = unhide (ArticleText); // unhide all that is hidden
if (true == found) // if |dead-url= or |deadurl= found
{
if ((0 == deleted_count) && (0 == fixed_count)) // none were fixed
{
if (0 != comment_skip) // if skipped because of comments
Summary = @"Comment skip (" + comment_skip + @"×)"; // say how many
else
Summary = @"Skipped for unknown reason"; // say that we don't know why we didn't fix
Skip = true;
}
else
{
Summary = "[[User:Monkbot/task 16: remove replace deprecated dead-url params|Task 16]]:";
Summary = Summary + @" replaced (" + fixed_count + @"×) / removed (" + deleted_count + @"×) deprecated |dead-url= and |deadurl= with |url-status=;";
}
}
else // here when cs1|2 templates don't have any |dead-url= or |deadurl= params
{
Summary = @"no dead-url params";
Skip = true;
}
return ArticleText;
}
//===========================<< S U P P O R T >>==============================================================
//---------------------------< H I D E >----------------------------------------------------------------------
//
// HIDE TEMPLATES: find templates that are not <dont_hide>; replace the opening {{ with __0P3N__, the closing }}
// with __CL0S3__, and internal | (pipes) with __P1P3__
//
// single curly braces in urls and other parameter values can confuse other regex in this code so replace {
// with __0CU!21Y__ and } with __CCU!21Y__
//
private string hide (string ArticleText, string dont_hide)
{
string pattern = @"\{\{(?!\s*" + dont_hide + @")[^\{\}]*\}\}";
if (Regex.Match (ArticleText, pattern).Success)
{
ArticleText = Regex.Replace(ArticleText, pattern,
delegate(Match match)
{
string fixed_template; // a hidden template is assembled here
string raw_template = match.Groups[0].Value; // the whole template
pattern = @"\{\{"; // hide the opening {{
fixed_template = Regex.Replace (raw_template, pattern, "__0P3N__");
pattern = @"\}\}"; // hide the closing }}
fixed_template = Regex.Replace (fixed_template, pattern, "__CL0S3__");
pattern = @"\|"; // and hide the pipes
fixed_template = Regex.Replace (fixed_template, pattern, "__P1P3__");
return fixed_template;
});
}
pattern = @"([^\{])\{([^\{])"; // single opening curly brace
ArticleText = Regex.Replace(ArticleText, pattern, "$1__0CU!21Y__$2");
pattern = @"([^\}])\}([^\}])"; // single closing curly brace
ArticleText = Regex.Replace(ArticleText, pattern, "$1__CCU!21Y__$2");
pattern = @"\[\[(?![Ff]ile|[Ii]mage)([^\|\]]+)\|([^\]]+)\]\]"; // HIDE complex wikilinks: [[article title|label]] to __WL1NK_O__article title__P1P3__label__WL1NK_C__
ArticleText = Regex.Replace(ArticleText, pattern, "__WL1NK_O__$1__P1P3__$2__WL1NK_C__"); // [[File: with wikilinks inside can be confusing
pattern = @"\[\[([^\]]+)\]\]"; // HIDE simple wikilinks: [[article title]] to __WL1NK_O__article title__WL1NK_C__
ArticleText = Regex.Replace(ArticleText, pattern, "__WL1NK_O__$1__WL1NK_C__");
pattern = @"\<!\-\- *Bot[\- ]generated title *\-\->";
ArticleText = Regex.Replace (ArticleText, pattern, "__B07_G3N_717L3__"); // bot generated title comment
pattern = @"\<!\-\- *Staff writer\(s\); no by-line\. *\-\->";
ArticleText = Regex.Replace (ArticleText, pattern, "__574FF_WR173R5__"); // staff writers
pattern = @"\<!\-\- *Not stated *\-\->";
ArticleText = Regex.Replace (ArticleText, pattern, "__N07_57473D__"); // not stated
return ArticleText;
}
//---------------------------< U N H I D E >------------------------------------------------------------------
//
// UNHIDE TEMPLATES: find templates and wikilinks that are hidden; replace the 'hide' keywords with the
// appropriate wiki markup
//
private string unhide (string ArticleText)
{
ArticleText = Regex.Replace(ArticleText, @"__N07_57473D__", "<!--Not stated-->"); // <!--Not stated-->
ArticleText = Regex.Replace(ArticleText, @"__574FF_WR173R5__", "<!--Staff writer(s); no by-line.-->"); // <!--Staff writer(s); no by-line.-->
ArticleText = Regex.Replace(ArticleText, @"__B07_G3N_717L3__", "<!-- Bot generated title -->"); // bot generated title comment
ArticleText = Regex.Replace(ArticleText, @"__WL1NK_O__", "[["); // UNHIDE: replace __WL1NK_O__ with [[
ArticleText = Regex.Replace(ArticleText, @"__WL1NK_C__", "]]"); // UNHIDE: replace __WL1NK_C__ with ]]
ArticleText = Regex.Replace(ArticleText, @"__P1P3__", "|"); // UNHIDE: replace __P1P3__ with |
ArticleText = Regex.Replace(ArticleText, @"__0CU!21Y__", "{"); // UNHIDE: replace __0CU!21Y__ with {
ArticleText = Regex.Replace(ArticleText, @"__CCU!21Y__", "}"); // UNHIDE: replace __CCU!21Y__ with }
ArticleText = Regex.Replace(ArticleText, @"__0P3N__", "{{"); // UNHIDE: replace __0P3N__ with {{
ArticleText = Regex.Replace(ArticleText, @"__CL0S3__", "}}"); // UNHIDE: replace __CL0S3__ with }}
return ArticleText;
}
//---------------------------< E M P T Y _ P A R A M _ R E M O V E >------------------------------------------
//
// This function removes all empty named parameters from a template, attempting to leave what remains the same form.
//
// this is a multi-step process that attempts to handle most of the vagaries of how templates are written in
// wikitext. In general there are three basic 'styles': horizontal – all parameters written on a single
// line of text, vertical – all parameter written singly one-to-a-line, and a mix of the two – multiple lines
// where each has one or more parameters.
//
// 1. where the parameter name & '=' are on one line and the value on a following line, put the value on the same line as the '='
// 2. for mixed, when empties are followed by new line; remove the empty but leave the newline
// 3. for any, empties are followed by pipe closing }; remove the empty but leave the | or }
// 4. the preceding steps can leave blank lines; remove the blank lines
//
private string empty_param_remove (string template)
{
string pattern = @"(\|[^=]+=[ \t]*)[\r\n]+(?!\s*[\|\}])"; // parameter name & '=' on one line, value on a following line
while (Regex.Match(template, pattern).Success) // put them on the same line
template = Regex.Replace(template, pattern, "$1");
pattern = @"\|[^=]+=[ \t]*([\r\n]+)"; // empty followed by new line
while (Regex.Match(template, pattern).Success)
template = Regex.Replace(template, pattern, "$1");
pattern = @"\|[^=]+=\s*([\|\}])"; // empty followed by pipe or at end of template
while (Regex.Match(template, pattern).Success)
template = Regex.Replace(template, pattern, "$1");
pattern = @"([\r\n]+)[ \t]*[\r\n]+"; // close up multiple new lines
while (Regex.Match(template, pattern).Success)
template = Regex.Replace(template, pattern, "$1");
return template;
}
//Monkbot_task_16_remove_replace_deprecated_dead-url_params.cs