(* Definitions of functions *)
(* Previously defined variables in html.mx *)
ClearAll[stringtrim, porder];
printf[x_] := ToString[NumberForm[N[x], 11, ExponentFunction -> (Null &)]];
printstop[s__] := (
Print["Fatal error: ", s];
If[Input[ToString[First[{s}]] <> " --- Exiting? (True or False)", True],
Exit[]];
);
printnames[data_] := (
If[ifPrintNames === True, Print[data // TableForm]];
);
checkdir[dir_] :=
If[Head[dir] =!= String,
printstop["checkdir: " <> ToString[InputForm[dir]] <> " not a string."],
If[FileType[dir] =!= Directory,
printstop["checkdir: " <> dir <> " not a directory."];
]];
checkdir1[dir_] :=
If[Head[dir] =!= String,
printstop["checkdir: " <> ToString[InputForm[dir]] <> " not a string."],
If[!DirectoryQ[dir],
Print["Creating directory ", dir];
CreateDirectory[dir]]];
checkfile[file_] :=
If[Head[file] =!= String,
printstop["checkfile: " <> ToString[InputForm[file]] <> " not a string."],
If[FileType[file] =!= File,
printstop["checkfile: " <> file <> " not a file."];
]];
silentrun[command_String] := Module[{rundir, batchfile, linkfile},
rundir = ToFileName[{dirwork, "temp"}];
checkdir1[rundir];
SetDirectory[rundir];
batchfile = "mathcommand.bat";
(* It should be a shortcut to batchfile.
Properties should be set as Run minimized. *)
linkfile = "linkcommand.lnk";
checkfile /@ {batchfile, linkfile};
Export[batchfile, command, "Text"];
SystemOpen[linkfile];
ResetDirectory[];
];
wget[url_String, outfile_String, logfile_String] := Module[{useragent, tempfile, url1, command, line2, line3, text},
useragent = "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9";
tempfile = "e:\\temp\\current.log";
Export[outfile, "", "Text"];
checkfile[outfile];
Export[tempfile, "", "Text"];
checkfile[tempfile];
(* DOS operating system interpretation of % *)
url1 = StringReplace[url, "%" -> "%%"];
command = "wget --no-check-certificate -d -t 30 -U \"" <> useragent <>
"\" -O \"" <> outfile <>
"\" -o \"" <> tempfile <>
"\" \"" <> url1 <> "\"";
Run[command];
(*
line20 = "echo --------------- >> \"" <> logfile <> "\"";
line2 = "type \"" <> tempfile <> "\" >> \"" <> logfile <> "\"";
line3 = "del \"" <> tempfile <> "\"";
silentrun[command <> "\n" <> line20 <> "\n" <> line2 <> "\n" <> line3];
*)
(* Waiting until it finishes *)
Do[Pause[0.2]; If[FileType[tempfile] =!= File, Break[]], {5000}];
text = Import[outfile, "Text"];
If[StringLength[text] < 10, Print["Warning: wget: output is too short: ", text]];
text];
proxify[url_String] := StringReplace[url, RegularExpression["^http://([^/]+)/"] -> "http://$1.6a.nl/"];
progname[f_String] :=
If[FileType[f] =!= File,
Print["Error: progname: File ", f, " does not exist. Exiting ..."];
Exit[],
pname =
StringCases[Import[f, "Text"],
RegularExpression["[(][*]\\s*(.*)\\s*[*][)]"] :> "$1"];
pname = If[pname === {}, "Unnamed", First[pname]]];
toslash[s_String] := StringReplace[s, "\\" -> "/"];
tobackslash[s_String] := StringReplace[s, "/" -> "\\"];
stringtrim[s_String] :=
StringReplace[
StringReplace[s, Whitespace -> " "], {StartOfString ~~ " " -> "",
" " ~~ EndOfString -> ""}];
flankblank[s_String] := StringReplace[s, RegularExpression["(^\\s+|\\s+$)"] -> ""];
p[s_] := If[Head[s] === String,
Write[of, s],
Print["Error: function p[]: attempt to print non-string: ", s]];
(* Adding or removing special HTML-symbols *)
addsymb[s_String] :=
StringReplace[s, {
Whitespace -> " ",
"<" -> "<",
">" -> ">",
"&" -> "&",
"\"" -> """,
"'" -> "'",
"_" -> " "
}];
removesymb[s_String]:=
StringReplace[s,{
"<" -> " ",
">" -> " ",
"&" -> " ",
""" -> " ",
"'" -> " ",
" " -> " "
}];
removesymb1[s_String]:=
StringReplace[s,{
"<" -> " ",
">" -> " ",
"&" -> " ",
""" -> " ",
"'" -> " ",
" " -> " ",
"\n" -> " ",
"<" -> " ",
">" -> " ",
"&" -> " ",
"\"" -> " ",
"'" -> " ",
"_" -> " "
}];
shrink[s0_, cleft_Integer, cright_Integer, smiddle_String] := "";
shrink[s0_String, cleft_Integer, cright_Integer, smiddle_String] :=
Module[{s, slen, ctot, sp, dc, cl, cr, cpleft, spright, sleft, sright,
nwords, words},
s = StringReplace[s0, Whitespace -> " "];
slen = StringLength[s];
ctot = cleft + cright;
If[ctot + 2 > slen, Return[s]];
sp = StringPosition[s, RegularExpression["\\s\\S+\\s"]];
dc = Floor[StringLength[smiddle]/2];
cl = Max[cleft - dc, 0];
cr = Max[cright - dc, 0];
spleft = Select[sp, #[[2]] <= cl &];
spright = Select[sp, #[[1]] >= slen - cr + 1 &];
sleft =
If[spleft === {}, StringTake[s, cl],
StringTake[s, Last[spleft][[2]]]];
sright =
If[spright === {}, StringTake[s, -cr],
StringDrop[s, First[spright][[1]] - 1]];
nwords = Length[sp] - Length[spleft] - Length[spright];
words =
If[nwords == 0, "",
If[nwords == 1, "1 word", ToString[nwords] <> " words"]];
sleft <> StringReplace[smiddle, "WORDS" -> words] <> sright
];
(* Transformation x->0x *)
fixed2[s0_]:=Module[{s},
s=ToString[s0];
If[StringLength[s]<2,s="0"<>s];
s
];
(* Transformation x->00x, xx->0xx *)
fixed3[s0_]:=Module[{s},
s=ToString[s0];
If[StringLength[s]<3,s="0"<>s];
If[StringLength[s]<3,s="0"<>s];
s
];
stripext[s_String] := StringDrop[s, -4];
porder[s1_String, s2_String] :=
OrderedQ[{StringTake[s1 <> "000", 3], StringTake[s2 <> "000", 3]}];
ifprocessed[photo_] := Module[{dir, file, fileproc},
{dir, file} = photo;
fileproc = ToFileName[{dir0, "processed", "img" <> dir}, StringReplace[file, {"dsc" -> "", "_" -> "", "-" -> ""}] <> ".jpg"];
ifprocessed[photo] = (FileType[fileproc] === File)
];
hex[c_] :=(* Converting to hexadecimal form *)
StringJoin @@ ((ToString /@
IntegerDigits[Round[255. c], 16, 2]) /. {"10" -> "A",
"11" -> "B", "12" -> "C", "13" -> "D", "14" -> "E",
"15" -> "F"});
shortName[name_String] := Module[{a},
a = StringReplace[StringReplace[
StringReplace[name, " \[Times] " -> " "],
"<" ~~ Shortest[__] ~~ ">" -> ""], RegularExpression["(.*? .*?) .*"] -> "$1"];
If[StringMatchQ[a, matchName2], a,
Print["shortName: Warning: name ", name, " cannot be shortened."];
""]];
(* Recently added *)
(* Removing HTML tags from the string *)
removetags[s_String] := StringReplace[s, RegularExpression["(?i)(?s)<.{0,555}?>"] -> ""] // stringtrim;
(* Saving a web page to a file, if a certain condition is satisfied *)
urlsave[url_String, file_String, condition_] := Module[{resp, text},
{resp, text} = {"", ""};
If[FileNameTake[file] =!= file && FileType[DirectoryName[file]] =!= Directory,
Print["Error: urlsave[", {url, file, condition}, "]: directory for the file does not exist."],
If[FileType[file] =!= File(* && failedAttempts[nYear] < failedAttemptsM*),
(* main body *)
Print["Saving URL ", url, " to the file ", file];
useragent = "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9";
(* requestheaders = If[ValueQ[requestHeaders], requestHeaders, <|"UserAgent" -> useragent|>]; *)
(* For some unknown reason, since Jan. 19, 2020 it requires non-empty Username and Password *)
requestheaders = <|"UserAgent" -> useragent, "Username" -> "aa", "Password" -> "bb"|>;
resp = URLDownload[HTTPRequest[url, requestheaders], file, {"Headers", "StatusCode"}];
statusCode = "StatusCode" /. resp;
If[statusCode != 200,
Print["Warning: URL = ", url, ": bad response: ", resp];
(* failedAttempts[nYear] = failedAttempts[nYear] + 1;
If[failedAttempts[nYear] == failedAttemptsM,
Print["Warning: maximum download failed attempts reached for the tag: ", nYear]];*)
If[FileType[file] === File,
Print["File ", file, " will be deleted!"];
Print[Short[Import[file, "Text"], 10]];
DeleteFile[file];];];
If[FileType[file] === File,
text = Import[file, "TEXT"];
If[!condition[text],
Print["Warning: URL = ", url, ": file ", file, " will be deleted because the condition ", condition, " is not satisfied."];
Print["Content of the file ", file, ": ", Short[text, 20]];
DeleteFile[file];
],
Print["Error: urlsave[", {url, file, condition}, "]: output file was not created."];
];
(* *)
];
];
{resp, text}];
(* Saving a web page to a file with javascripts *)
urlsavejs[url_String, namejs_String, file_String] :=
Module[{dirnode, dirfile, filejs, filejs1, file1, textjs, textjs1},
dirnode = "C:\\temp";
dirfile = DirectoryName[file];
filejs = FileNameJoin[{dirwork, namejs}];
checkfile[filejs];
filejs1 = FileNameJoin[{dirnode, namejs}];
file1 = FileNameJoin[{dirnode, "page.htm"}];
textjs = Import[filejs, "Text"];
checkdir[dirnode];
SetDirectory[dirnode];
textjs1 = StringReplace[textjs, "URL" -> url];
Export[filejs1, textjs1, "Text"];
If[FileExistsQ[file1], DeleteFile[file1]];
Run["node " <> namejs];
If[FileExistsQ[file1],
If[FileType[dirfile] =!= Directory, CreateDirectory[dirfile]];
If[FileExistsQ[file], DeleteFile[file]];
CopyFile[file1, file];
FileByteCount[file1],
Print["Warning: urlsavejs(", url, "): file ", file1,
" does not exist."];
0]
];
(* Updating data by replacing obsolete names *)
updatenames[data_List] := Module[{datat},
datat = Transpose[data];
datat[[1]] = datat[[1]] /. replnames;
Transpose[datat]];
(* HTML names without spaces *)
htmlname[s_String] := StringReplace[s, {" " -> "_", "(" -> "-", ")" -> "-"}] // ToLowerCase // transliterate;
(* Improve top line *)
toplinefix[s_String] := StringReplace[s, Whitespace~~">"~~Whitespace -> " \:203a "];
(* Improve classification line *)
classlinefix[s_String] := StringReplace[s, {Whitespace~~">"~~Whitespace -> ": ",
"kingdom" -> kingdom, "phylum" -> phylum, "class" -> class, "order" -> order, "family" -> family, "species" -> "" <> namehtml <> ""}];
(* Reads all page fragments marked by
Synonym: " <> synline <> ".
", "Synonyms: " <> synline <> ".
" ]]; synlineru = If[msynonyms == 0, "", If[msynonyms == 1, "\:0421\:0438\:043d\:043e\:043d\:0438\:043c: " <> synline <> ".
", "\:0421\:0438\:043d\:043e\:043d\:0438\:043c\:044b: " <> synline <> ".
" ]]; (* Common names *) cnamesen = ennames; mcnamesen = Length[cnamesen]; comlineen = If[mcnamesen == 0, "", If[mcnamesen == 1, cnamesen[[1]], StringJoin[Riffle[cnamesen, ", "]] ]]; comlineen1 = If[mcnamesen < 2, "", StringJoin[Riffle[Drop[cnamesen, 1], ", "]] ]; cnamesru = runames; mcnamesru = Length[cnamesru]; comlineru = If[mcnamesru == 0, "", If[mcnamesru == 1, cnamesru[[1]], StringJoin[Riffle[cnamesru, ", "]] ]]; locnames = transliterate[comlineru]; comlineru1 = If[mcnamesru < 2, "", StringJoin[Riffle[Drop[cnamesru, 1], ", "]] ]; locline = If[mcnamesru == 0, "", If[mcnamesru == 1, "Russian name: " <> locnames <> ".
", "Russian names: " <> locnames <> ".
" ]]; commlineen = If[mcnamesen == 0, "", If[mcnamesen == 1, "Common name: " <> comlineen <> ".
", "Common names: " <> comlineen <> ".
" ]]; commlineru = If[mcnamesru == 0, "", If[mcnamesru == 1, "\:0420\:0443\:0441\:0441\:043a\:043e\:0435 \:043d\:0430\:0437\:0432\ \:0430\:043d\:0438\:0435: " <> comlineru <> ".
", "\:0420\:0443\:0441\:0441\:043a\:0438\:0435 \:043d\:0430\:0437\:0432\ \:0430\:043d\:0438\:044f: " <> comlineru <> ".
" ]]; (* Keywords *) allnamesen = Join[{nameid}, If[Length[ennames] < 8, ennames, Take[ennames, 7]], If[Length[synonyms] < 4, synonyms, Take[synonyms, 3]]]; enkeywords = StringJoin[Riffle[allnamesen, ", "]]; allnamesru = Join[{nameid}, If[Length[runames] < 8, runames, Take[runames, 7]], If[Length[synonyms] < 4, synonyms, Take[synonyms, 3]]]; rukeywords = StringJoin[Riffle[allnamesru, ", "]]; ]; dateS[date_List, s_String] := DateString[date, {"Year", s, "Month", s, "Day"}]; picCaption[r_String, p_String] := (picCaption[r, p] = stringtrim[ncaption[caption[r, p], "; "]]); picCaptionW[r_String, p_String] := Module[{cap, dash, x, y}, cap = picCaption[r, p]; dash = "DDDDDDDDDD"; cap = StringReplace[cap, x : LetterCharacter ~~ "-" ~~ y : LetterCharacter :> x <> dash <> y]; cap = StringReplace[cap, Except[LetterCharacter] -> " "]; cap = StringReplace[cap, dash -> "-"]; stringtrim[cap] ]; (* Saving list of common names to a file *) cnameSources = {}; dirCnames = FileNameJoin[{dirwork, "cnames"}]; cdataSave[cdata_List, source_String] := Module[{sl, ndata, dirsave, filesave, lines, name, data, sln}, sl = 30; ndata = Length[cdata]; If[ndata > 0, cnameSources = Append[cnameSources, source]; dirsave = dirCnames; checkdir1[dirsave]; filesave = FileNameJoin[{dirsave, source <> ".dat"}]; lines = Table[ {name, data} = cdat; data = stringtrim /@ StringSplit[data, ","]; data = StringRiffle[data, ", "]; name = name <> ": "; sln = StringLength[name]; If[sln < sl, name = StringJoin[name, Table[" ", {sl - sln}]]]; name <> data, {cdat, cdata}]; Export[filesave, lines, "Lines"]; Print["cdataSave: wrote ", ndata, " lines. Souce: ", source], Print["cdataSave: nothing to add. Souce: ", source]; ]; ]; (* Russian names of months *) filemonths = ToFileName[dirwork, "rusmonths.txt"]; checkfile[filemonths]; rusmonths = StringSplit[Import[filemonths, "Text"]]; If[Length[rusmonths] != 12, Print["Error: rusmonths: ", rusmonths, ". The length should be 12. Exiting ..."]; Exit[]; ]; openOnClick[file_String, text_String] := Button[ Style[text, Blue, FontFamily -> "Arial", 12], Run["start notepad \"" <> file <> "\""], Appearance -> "Frameless" ];