(*************************************************** Ant Movie Catalog importation script www.antp.be/software/moviecatalog/ [Infos] Authors=Hubert Kosior, KaraGarga, Chetan Rao, Donna Huffman Title=All Movie Guide Description=All Movie Guide detailed info import with small picture Site=http://www.allmovie.com Language=EN Version=0.5.2 / March 2009 Requires=3.5.0 Comments=a bug corrected by Antoine Potten|completely rewritten for the new AllMovieGuide site by Chetan Rao.|Reworked March 2009 for changes to AllMovieGuide by Donna Huffman License=This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. | GetInfo=1 [Options] CategoryOptions=3|1|1=Only import first category|2=Import all categories and separate with "/"|3=Import all categories and separate with "," ProducerOptions=0|1|0=Do Not Import into Producer Field|1=Import Production Company in Producer Field SynopsisOptions=1|1|1=Import into Description Field|2=Import into Comments Field|0=DO NOT import Synopsis ReviewOptions=2|2|1=Import into Description Field|2=Import into Comments Field|0=DO NOT import Review AwardsOptions=2|2|1=Import into Description Field|2=Import into Comments Field|0=DO NOT import Awards List CastOptions=3|3|1=Import Cast divided by ";"|2=Import Cast as a list (AMG Default)|3=Import Cast as a list (like IMDB)|4=Import Cast as a list within paranthesis|5=Import Cast within paranthesis FieldforCredits=2|1|0=DO NOT import Production Credits|1=Import Production Credits into Description Field|2=Import Production Credits into Comments Field|3=Import Production Credits into Actors Field CreditsOptions=2|3|1=Import Credits divided by ";"|2=Import Credits as a list (AMG Default)|3=Import Credits as a list (like IMDB)|4=Import Credits as a list within paranthesis|5=Import Credits within paranthesis ***************************************************) program AllMovie; uses StringUtils1; var MovieName: string; PageURL: string; // simple string procedures function StringReplaceAll(S, Old, New: string): string; begin while Pos(Old, S) > 0 do S := StringReplace(S, Old, New); Result := S; end; function GetStringFromList(Content, Delimiter: string): string; begin Result := StringReplace(Content, '', Delimiter); HTMLRemoveTags(Result); HTMLDecode(Result); Result := UTF8Decode(Result); Result := StringReplace(Result, #9, ''); Result := StringReplace(Result, #10, ''); Result := StringReplace(Result, #13, ''); // remove trailing delimiter if (Copy(Result, Length(Result), 1) = Delimiter) then Result := Copy(Result, 0, Length(Result) - 1); Result := StringReplaceAll(Result, ' ', ' '); Result := StringReplaceAll(Result, ', ', ','); Result := StringReplace(Result, ',', ', '); end; function GetStringFromTable(Content, Delimiter, ColDelim : string): string; var Data : string; ColLen : Integer; begin ColLen := Length(ColDelim); Content := StringReplace(Content, #9, ''); Content := StringReplace(Content, #10, ''); Content := StringReplace(Content, #13, ''); Content := StringReplace(Content, '', ColDelim); Content := StringReplace(Content, '', Delimiter); Content := StringReplace(Content, '', ColDelim); Content := StringReplaceAll(Content, ' ', ''); HTMLRemoveTags(Content); HTMLDecode(Content); Content := UTF8Decode(Content); Result := ''; Result := Content; end; procedure CutAfter(var Str: string; Pattern: string); begin Str := Copy(str, Pos(Pattern, Str) + Length(Pattern), Length(Str)); end; procedure CutBefore(var Str: string; Pattern: string); begin Str := Copy(Str, Pos(Pattern, Str), Length(Str)); end; // Loads and analyses page from internet (list of movies or direct hit) procedure AnalyzePage(Address: string); var Page: TStringList; begin Page := TStringList.Create; Page.Text := GetPage(Address); // ShowMessage('AnalysePage ' + Address); // movie list Sleep(500); if Pos('Search Results for:', Page.Text) > 0 then begin PickTreeClear; PickTreeAdd('Search results', ''); AddMoviesTitles(Page); if PickTreeExec(Address) then // ShowMessage(Address); AnalyzePage(Address); // refine search end else if Pos('Sorry, there are too many possible matches, please adjust your search.', Page.Text) > 0 then begin ShowMessage('Sorry, there is too many possible matches, please adjust your search.'); if Input('All Movie Import', 'Enter the title of the movie:', MovieName) then AnalyzePage('http://www.allmovie.com/search/work/' + URLEncode(MovieName)); // direct hit end else begin if CanSetField(fieldURL) then SetField(FieldURL, Address); // showmessage (Address); AnalyzeMoviePage(Page) end; // cleanup Page.Free; end; // Extracts movie details from page procedure AnalyzeMoviePage(MoviePage: TStringList); var Page: string; Value: string; Content: string; Dummy: string; SubPage: TStringList; begin Page := MoviePage.Text; Page := UTF8Decode(Page); SubPage := TStringList.Create; // Original title if CanSetField(fieldOriginalTitle) then begin Value := TextBetween(Page, 'span class="title">', ''); SetField(fieldOriginalTitle, Value); end; // get the left panel content -- this yields year, runtime, country, director & genre Content := TextBetween(Page, '', '</html>'); // remove unwanted formatting code Content := StringReplace(Content, #9, ''); Content := StringReplaceAll(Content, ' ', ''); // Year if CanSetField(fieldYear) then begin Value := TextAfter(Content, '<span>Year</span>'); Value := GetStringFromHTML(Value, 'allmovie.com/explore/year/', '">', '</a>'); SetField(fieldYear, Value); end; // Length if CanSetField(fieldLength) then begin Value := TextAfter(Content, '<span>Run Time</span>'); Value := TextAfter(Value, 'width: 86px;">'); // length is second field in the table Value := GetStringFromHTML(Value, 'width: 86px;">', 'width: 86px;">', ' min'); SetField(fieldLength, Value); end; // Country if CanSetField(fieldCountry) then begin Value := TextAfter(Content, '<span>Countries</span>'); Value := GetStringFromHTML(Value, 'allmovie.com/explore/country/', '">', '</a>'); SetField(fieldCountry, Value); end; // AKA -> translated title if CanSetField(fieldTranslatedTitle) then begin Value := TextAfter(Content, '<span>AKA</span>'); Value := GetStringFromHTML(Value, 'class="formed-sub">', 'class="formed-sub">', '</td>'); SetField(fieldTranslatedTitle, Value); end; // Rating (multiplied by 2, because 0 <= AMG rating <= 5) if CanSetField(fieldRating) then begin Value := GetStringFromHTML(Page, '<span>Work Rating</span>', 'alt="', ' Stars'); if Length(Value) > 0 then begin SetField(fieldRating, FloatToStr(StrToFloat(Value)*2)); end; end; // Director if CanSetField(fieldDirector) then begin Value := TextAfter(Content, '<span>Director</span>'); Value := GetStringFromHTML(Value, 'allmovie.com/artist/', '">', '</a>'); SetField(fieldDirector, Value); end; // Genre -> category if CanSetField(fieldCategory) then begin Value := TextAfter(Content, '<span>Genres</span>'); Value := TextBetween(Value, '<ul>', '</ul>'); Value := StringReplace(Value, '[nf]', ''); HTMLDecode(Value); // showmessage (value); if GetOption('CategoryOptions') = 1 then begin Value := TextBetween(Value, '<li>', '</li>'); HTMLRemoveTags(Value); // showmessage (value); end; if GetOption('CategoryOptions') = 2 then Value := GetStringFromList(Value, '/'); // showmessage (value); if GetOption('CategoryOptions') = 3 then Value := GetStringFromList(Value, ','); SetField(fieldCategory, Value); end; // Producing company -> producer if CanSetField(fieldProducer) then begin if GetOption('ProducerOptions') = 1 then begin Value := TextAfter(Content, '<span>Produced by</span>'); Value := GetStringFromHTML(Value, '<a href="http://www.allmovie.com/', '">', '</a>'); HTMLRemoveTags(Value); HTMLDecode(Value); SetField(fieldProducer, Value); end; end; // Image if CanSetPicture then begin Value := GetStringFromHTML(Page, 'http://image.allmusic.com', '', '"'); if Length(Value) > 0 then GetPicture(Value); end; // Plot synopsis if CanSetField(fieldComments) or CanSetField(fieldDescription) then begin // store the author of the synopsis Value := GetStringFromHTML(Content, 'class="author">by ', '<td colspan="2">', '</table>'); HTMLRemoveTags(Dummy); HTMLDecode(Value); if GetOption('SynopsisOptions') = 1 then SetField(fieldDescription, Value); if GetOption('SynopsisOptions') = 2 then SetField(fieldComments, Value); end; // Cast -> actors if CanSetField(fieldActors) then begin // is a cast list available? if Pos('cast">Cast</a>', Page) > 0 then begin // first find the link Dummy := TextBetween(Page, '>Review<','Cast</a>'); Dummy := TextBetween(Dummy, 'href="','">'); // get the page SubPage.Text := GetPage(Dummy); // get the center panel content -- this yields the Cast table Value := TextBetween(SubPage.Text, '<div id="results-table">', '</table>'); // Clean up list Value := StringReplace(Value, #9, ''); Value := StringReplace(Value, #10, ''); Value := StringReplace(Value, #13, ''); Value := StringReplace(Value, ' ', ''); Value := StringReplace(Value, '<td width="305">- <em>', '||'); Value := StringReplace(Value, '</tr>', '~~'); Value := StringReplaceAll(Value, ' ~~', '~~'); HTMLRemoveTags(Value); HTMLDecode(Value); Value := UTF8Decode(Value); if Length(Value) > 0 then begin // remove double spaces if only actor name given while Pos(' ', Value) > 0 do Delete(Value, Pos(' ', Value), 2); if GetOption('CastOptions') = 1 then begin Value := StringReplace(Value, '~~', '; '); Value := StringReplace(Value, '||', ' - '); Value := StringReplace(Value, ' ;', ';'); SetField(fieldActors, Value); end; if GetOption('CastOptions') = 2 then begin Value := StringReplace(Value, '~~', #13#10); Value := StringReplaceAll(Value, '||', ' - '); if (Copy(Value, Length(Value) - 1, 2) = #13#10) then Value := Copy(Value, 0, Length(Value) - 2); SetField(fieldActors, Value); end; if GetOption('CastOptions') = 3 then begin Value := StringReplace(Value, '~~', #13#10); Value := StringReplace(Value, '||', ' ... '); if (Copy(Value, Length(Value) - 1, 2) = #13#10) then Value := Copy(Value, 0, Length(Value) - 2); SetField(fieldActors, Value); end; if GetOption('CastOptions') = 4 then begin Value := StringReplace(Value, '~~', ')'+#13#10); Value := StringReplace(Value, '||', ' ('); Value := StringReplaceAll(Value, ' )', ')'); if (Copy(Value, Length(Value) - 1, 2) = #13#10) then Value := Copy(Value, 0, Length(Value) - 2); SetField(fieldActors, Value); end; if GetOption('CastOptions') = 5 then begin Value := StringReplace(Value, '~~', '), '); Value := StringReplace(Value, '||', ' ('); Value := StringReplaceAll(Value, ' )', ')'); if (Copy(Value, Length(Value) - 1, 2) = ', ') then Value := Copy(Value, 0, Length(Value) - 2); SetField(fieldActors, Value); end; end; end; end; // Review -> description if CanSetField(fieldComments) or CanSetField(fieldDescription) then begin // is a review available? if Pos('review">Review</a>', Page) > 0 then begin // first find the link Dummy := TextBetween(Page, '>Overview</td>','Review</a>'); Dummy := TextBetween(Dummy, 'href="','">'); // get the page SubPage.Text := ''; SubPage.Text := GetPage(Dummy); // get the center panel content -- this yields the review Content := ''; Content := TextBetween(SubPage.Text, '<td align="right" class="author">', '</table>'); // store the author of the synopsis Value := ''; Value := TextBetween(Content, '<td colspan="2"><p>', '</td>'); HTMLRemoveTags(Value); HTMLDecode(Value); Value := UTF8Decode(Value); if GetOption('ReviewOptions') = 1 then SetField(fieldDescription, Value); if GetOption('ReviewOptions') = 2 then SetField(fieldComments, Value); end; end; // Awards -> description if CanSetField(fieldComments) or CanSetField(fieldDescription) then begin // is an awards page available? if Pos('awards">Awards</a>', Page) > 0 then begin // first find the link Dummy := TextBetween(Page, '>Production Credits</a>','Awards</a>'); Dummy := TextBetween(Dummy, 'href="','">'); // get the page SubPage.Text := ''; SubPage.Text := GetPage(Dummy); // get the center panel content -- this yields the awards Content := TextBetween(SubPage.Text, '<table class="awards" cellpadding="0" cellspacing="0" width="100%">', '</table>'); Value := GetStringFromTable(Content, '~~', '||'); Value := StringReplace(Value, '||~~', '~~'); Value := StringReplace(Value, '||||', '||'); Value := StringReplace(Value, '~~||', '~~'); Value := StringReplace(Value, '~~', #13#10); Value := StringReplace(Value, '||', ' - '); if Length(Value) > 0 then begin if GetOption('AwardsOptions') = 1 then SetField(fieldDescription, GetField(fieldDescription) + #13#10 + #13#10 + 'AWARDS:' +#13#10 + #13#10 + Value + #13#10); if GetOption('AwardsOptions') = 2 then SetField(fieldComments, GetField(fieldComments) + #13#10 + #13#10 + 'AWARDS:' + #13#10 + #13#10 + Value + #13#10); end; end; end; // ProductionCredits -> Comments/Description if CanSetField(fieldComments) or CanSetField(fieldDescription) then begin // is a credits list available? if Pos('credits">Production Credits', Page) > 0 then begin // first find the link Dummy := TextBetween(Page, '>Cast</a>','Awards</a>'); Dummy := TextBetween(Dummy, 'href="','">'); // get the page SubPage.Text := GetPage(Dummy); // get the center content -- this yields the credits list Value := TextBetween(SubPage.Text, '<div id="results-table">', '</table>'); // Clean up list Value := StringReplace(Value, #9, ''); Value := StringReplace(Value, #10, ''); Value := StringReplace(Value, #13, ''); Value := StringReplace(Value, ' ', ''); Value := StringReplace(Value, '<td width="305">- <em>', '||'); Value := StringReplace(Value, '</tr>', '~~'); Value := StringReplaceAll(Value, ' ~~', '~~'); HTMLRemoveTags(Value); HTMLDecode(Value); Value := UTF8Decode(Value); if Length(Value) > 0 then begin // remove double spaces while Pos(' ', Value) > 0 do Delete(Value, Pos(' ', Value), 2); if GetOption('CreditsOptions') = 1 then begin Value := StringReplace(Value, '~~', '; '); Value := StringReplace(Value, '||', ' - '); Value := StringReplaceAll(Value, ' ;', ';'); end; if GetOption('CreditsOptions') = 2 then begin Value := StringReplace(Value, '~~', #13#10); Value := StringReplace(Value, '||', ' - '); if (Copy(Value, Length(Value) - 1, 2) = #13#10) then Value := Copy(Value, 0, Length(Value) - 2); end; if GetOption('CreditsOptions') = 3 then begin Value := StringReplace(Value, '~~', #13#10); Value := StringReplace(Value, '||', ' ... '); if (Copy(Value, Length(Value) - 1, 2) = #13#10) then Value := Copy(Value, 0, Length(Value) - 2); end; if GetOption('CreditsOptions') = 4 then begin Value := StringReplace(Value, '~~', ')'+#13#10); Value := StringReplace(Value, '||', ' ('); Value := StringReplaceAll(Value, ' )', ')'); if (Copy(Value, Length(Value) - 1, 2) = #13#10) then Value := Copy(Value, 0, Length(Value) - 2); end; if GetOption('CreditsOptions') = 5 then begin Value := StringReplace(Value, '~~', '), '); Value := StringReplace(Value, '||', ' ('); Value := StringReplaceAll(Value, ' )', ')'); if (Copy(Value, Length(Value) - 1, 2) = ', ') then Value := Copy(Value, 0, Length(Value) - 2); end; if GetOption('FieldforCredits') = 1 then SetField(fieldDescription, GetField(fieldDescription) + 'PRODUCTION CREDITS:' +#13#10 + #13#10 + Value); if GetOption('FieldforCredits') = 2 then SetField(fieldComments, GetField(fieldComments) + 'PRODUCTION CREDITS:' + #13#10 + #13#10 + Value); if GetOption('FieldforCredits') = 3 then SetField(fieldActors, GetField(fieldActors) + 'PRODUCTION CREDITS:' + #13#10 + #13#10 + Value); end; end; // cleanup SubPage.Free; end; // remove trailing newline from description or comments Value := GetField(fieldDescription); if Copy(Value, Length(Value) - 1, 2) = #13#10 then begin Value := Copy(Value, 0, Length(Value) - 2); SetField(fieldDescription, Value); end; Value := GetField(fieldComments); if Copy(Value, Length(Value) - 1, 2) = #13#10 then begin Value := Copy(Value, 0, Length(Value) - 2); SetField(fieldComments, Value); end; end; // Adds movie titles from search results to tree procedure AddMoviesTitles(ResultsPage: TStringList); var Page: string; MovieTitle, MovieAddress: string; begin // ShowMessage('AddMoviesTitles'); Page := TextBetween(ResultsPage.Text, '<a>Category</a>', '<div id="footer">'); // Every movie entry begins with string '<a href="http://www.allmovie.com/work/' while Pos('<a href="http://www.allmovie.com/work/', Page) > 0 do begin CutBefore(Page, '<a href="http://www.allmovie.com/work/'); MovieAddress := GetStringFromHTML(Page, 'http://www.allmovie.com/work/', '', '">'); // Get Movie Title MovieTitle := GetStringFromHTML(Page, '">', '">', '</a>'); // Add year to movie title MovieTitle := MovieTitle + ' ('+GetStringFromHTML(Page, '<td class="cell" style="width: 70px;">', '">', '</td>')+')'; // Add producer to MovieTitle MovieTitle := MovieTitle + ' '+GetStringFromHTML(Page, '<td class="cell" style="width: 190px;">', '">', '</td>'); CutAfter(Page, '</a>'); // add movie to list PickTreeAdd(MovieTitle, MovieAddress); end; end; // Extracts single movie detail (like director, genre) from page function GetStringFromHTML(Page, StartTag, CutTag, EndTag: string): string; begin Result := ''; // recognition tag - if present, extract detail from page, otherwise assume detail is not present if Pos(StartTag, Page) > 0 then begin CutBefore(Page, StartTag); // optional cut tag helps finding right string in html page if Length(CutTag) > 0 then CutAfter(Page, CutTag); // movie detail copied with html tags up to end string Result := Copy(Page, 0, Pos(EndTag, Page) - 1); // remove html tags and decode html string HTMLRemoveTags(Result); HTMLDecode(Result); UTF8Decode(Result); end; end; procedure RemovePronoun(var Str: string); var i: Integer; s: string; c: char; begin // remove pronouns s := UpperCase(Copy(Str, 0, 4)); if (s = 'LES ') or (s = 'UNE ') or (s = 'THE ') then Str := Copy(Str, 5, Length(Str) - 4) else begin s := Copy(s, 0, 3); if (s = 'LE ') or (s = 'UN ') then Str := Copy(Str, 4, Length(Str) - 3) else begin s := Copy(s, 0, 2); if (s = 'L''') or (s = 'L ') or (s = 'A ') then Str := Copy(Str, 3, Length(Str) - 2) end; end; // remove non-letters, non-digits and non-spaces s := ''; for i := 1 to Length(Str) do begin c := StrGet(Str, i); if ((c<'a') or (c>'z')) and ((c<'A') or (c>'Z')) and ((c<'0') or (c>'9')) and (c<>' ') then else s := s + Copy(Str, i, 1); end; Str := s; end; begin if CheckVersion(3,5,0) then begin MovieName := GetField(fieldOriginalTitle); if MovieName = '' then MovieName := GetField(fieldTranslatedTitle); if Input('All Movie Import', 'Enter title (only letters, digits and spaces):', MovieName) then begin if Pos('allmovie.com', MovieName) > 0 then AnalyzePage(MovieName) else begin //RemovePronoun(MovieName); PageURL := 'http://www.allmovie.com/search/work/' + StringReplace(URLEncode(MovieName), '%20', '+') ; AnalyzePage(PageURL); end; end; end else ShowMessage('This script requires a newer version of Ant Movie Catalog (at least the version 3.5.0)'); end.