(*************************************************** Ant Movie Catalog importation script www.antp.be/software/moviecatalog/ [Infos] Authors=Antoine Potten, KaraGarga, baffab, Thermal Ions, bad4u, Sancho Title=IMDB Description=Import data & picture from IMDB (optional image from Amazon) Site=us.imdb.com Language=EN Version=3.62 Requires=3.5.0 Comments=Based on the script made for version 3.3/3.4 by Antoine Potten, Danny Falkov, Kai Blankenhorn, lboregard, Ork, Trekkie, Youri Heijnen License=This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. GetInfo=1 [Options] ActorsLayout=2|0|0=Only actor names, separated by commas|1=Only actor names, separated by linebreaks|2=Actors names with character names between parenthesis separated by commas|3=Actors names with character names between parenthesis separated by linebreaks|4=Actor names like on IMDB page, with "...." and separated by linebreaks AllActors=1|0|0=Only import actors from main page (does not work if you import producer name)|1=Import all possible actors|2=Import 10 first actors (does not work with ActorLayout=4) AspectRatio=1|1|0=Do not import picture aspect ratio|1=Import picture aspect ratio to video format field|2=Import picture aspect ratio to resolution field Awards=2|0|0=Do not import awards|1=Import awards to Description field, after the summary|2=Import awards to Comments field, after comments BatchMode=0|0|0=Normal working mode, prompts user when needed|1=Does not display any window, takes the first movie found|2=Same as 1, but it uses the URL field if available to update movie information Classification=1|0|0=Do not import classification/certification|1=Import classification to MediaType field|2=Append classification to Comments field CommentType=1|0|0=Standard Type (Only one comment from main page)|1=Detailed Type (10 most useful comments from comments page)|2=No user comment, clear current field contents|3=No user comment, keep current field contents (may cause problem for multiple imports on the same movie if other options append text to the comment field) DescriptionSelection=1|0|0=Take the short summary, from main page (faster)|1=Show a list of available summaries|2=Take the longest summary GetTagline=1|1|0=Do not get tagline|1=Put it in Description field, before the summary|2=Put it in the Comment field, before the comments GoogleSearch=0|0|0=Use IMDB's website search function for results list|1=Use Google to search IMDB's website for a different results list (maybe useful for batch mode or as a workaround when script cannot read IMDB's results page)|2=Use Google to search IMDB's website for exact matches on film titles (sets movie name into quotation marks - more precise, but maybe some titles can't be found) HideAkaTitles=0|0|0=Show 'aka' titles on result list (= other titles for the film)|1=Hide 'aka' titles on result list ImageKind=3|3|0=No image|1=IMDB small image, from the main movie page, no image if none available|2=IMDB small image, from the main movie page, "No Poster Available" if none available|3=IMDB large image, from link on main movie page, else small image, else "No Poster Available"|4=Cover from "Merchandising Link" page, else "DVD detail" page, else small image|5=Cover from "DVD detail" page, else "Merchandising Link", else small image MPAA=2|0|0=Do not import MPAA rating|1=Import MPAA rating to MediaType|2=Append MPAA rating and info to Comments MultipleValuesCountry=1|1|0=Only take first value for Country|1=Take full list, separated by commas|2=Take full list, separated by slashes|3=Do not import Country MultipleValuesCategory=1|1|0=Only take first value for Category|1=Take full list, separated by commas|2=Take full list, separated by slashes|3=Do not import Category MultipleValuesLanguages=1|1|0=Only take first value for Languages|1=Take full list, separated by commas|2=Take full list, separated by slashes|3=Do not import Languages MultipleValuesAudioFormat=1|1|0=Only take first value for Audio Format|1=Take full list, separated by commas|2=Take full list, separated by slashes|3=Do not import Audio Format / Sound Mix PopularSearches=1|1|0=Do not use the popular searches page, directly show full search results|1=Show popular searches first, I'll click on "Find more" if needed (much faster) Producer=0|0|0=Do not get producer, put writer in the producer field instead|1=Get producer Trivia=1|0|0=Do not import trivia|1=Import short trivia to Description field, after the summary|2=Import short trivia to Comments field, after the comments|3=Import full trivia to Description field, after the summary|4=Import full trivia to Comments field, after the comments UserRatings=0|0|0=Import value to ratings field only (default)|1=Import value and number of votes to Media Type field|2=Import value and number of votes to comments field ConvertToASCII=0|0|0=Do not change special characters and accents.|1=Replace special characters and accents by basic ASCII characters EpisodeTitleSearch=0|0|0=Use "Find more" button on results list for next result pages if available (default)|1=Use "Find more" button on results list for episode title search instead ***************************************************) program IMDB; uses StringUtils1; // ***** Manually set UserCountry to your required Classification Country below ***** const //UserCountry = ''; { Delete the line above and remove the "//" in front of one the following lines, or add your country if it is not listed } UserCountry = 'USA'; //UserCountry = 'Canada'; //UserCountry = 'Mexico'; //UserCountry = 'Brazil'; //UserCountry = 'Argentina'; //UserCountry = 'Australia'; //UserCountry = 'India'; //UserCountry = 'Italy'; //UserCountry = 'Spain'; //UserCountry = 'Portugal'; //UserCountry = 'France'; //UserCountry = 'Germany'; //UserCountry = 'Netherlands'; //UserCountry = 'UK'; //UserCountry = 'Ireland'; //UserCountry = 'Finland'; //UserCountry = 'Norway'; //UserCountry = 'Sweden'; //UserCountry = 'Switzerland'; PopularTitleSearchURL = 'http://www.imdb.com/find?tt=1;q='; FullTitleSearchURL = 'http://www.imdb.com/find?more=tt;q='; EpisodeTitleSearchURL = 'http://www.imdb.com/find?s=tt;ttype=ep;q='; var MovieName: string; MovieURL: string; MovieNumber: string; UpdateFile: TStringList; function ConvertToASCII(AText: string): string; begin Result := AText; if GetOption('ConvertToASCII') = 1 then begin if StringUtils1_Version > 5 then Result := Cp1252ToASCII(AText) else ShowMessage('The "ConvertToASCII" option requires a newer version of StringUtils1.pas (at least version 6).' + #13#10 + 'Run the "Update Scripts" script to get it.'); end; end; // ***** analyzes IMDB's results page that asks to select a movie from a list ***** procedure AnalyzeResultsPage(Address: string); var PageText: string; Value: string; begin PageText := ConvertToASCII(GetPage(Address)); if pos('IMDb', PageText) = 0 then begin AnalyzeMoviePage(PageText) end else begin if Pos('<b>No Matches.</b>', PageText) > 0 then begin if GetOption('BatchMode') = 0 then ShowMessage('No movie found for this search.'); Exit; end; if GetOption('BatchMode') = 0 then begin PickTreeClear; repeat Value := TextBefore(PageText, '</b> (Displaying', '<p><b>'); if Value <> '' then begin HTMLRemoveTags(Value); HTMLDecode(Value); PickTreeAdd(Value, ''); end; Value := TextBetween(PageText, '<table><tr>', '</table>'); PageText := RemainingText; until not AddMovieTitles(Value); Value := TextBefore(PageText, '"><b>more titles</b></a>', '<a href="'); if Value <> '' then PickTreeMoreLink('http://us.imdb.com' + Value); if GetOption('EpisodeTitleSearch') > 0 then PickTreeMoreLink(EpisodeTitleSearchURL + UrlEncode(MovieName)); if PickTreeExec(Address) then AnalyzeResultsPage(Address); end else begin Value := TextBetween(PageText, '.</td><td valign="top">', '</a>'); if Value <> '' then AnalyzeResultsPage('http://us.imdb.com' + TextBetween(Value, '<a href="', '" onclick="')); end; end; end; // ***** analyzes Google's results page that asks to select a movie from a list ***** procedure AnalyzeGooglesResultsPage(GoogleAddress: string); var PageText: string; Value: string; Address: string; begin PageText := GetPage(GoogleAddress); Address := ''; if Pos('did not match any documents', PageText) > 0 then begin ShowMessage('No movie found for this search'); Exit; end; if GetOption('BatchMode') = 0 then begin PickTreeClear; PickTreeAdd('Google`s search results for "' + MovieName + '" on IMDB:', ''); repeat Value := TextBetween(PageText, '<h3 class=r>', '</a>'); PageText := RemainingText; Address := TextBetween(Value, '<a href="', 'maindetails"'); if (GetOption('AllActors') = 1) or (GetOption('Producer') = 1) then if Address <> '' then Address := Address + 'combined'; HTMLRemoveTags(Value); HTMLDecode(Value); if (Pos(') - ', Value) = 0) and (Value <> '') and (Address <> '') then PickTreeAdd(Value, Address); until Value = ''; if PickTreeExec(GoogleAddress) then AnalyzeResultsPage(GoogleAddress); end else begin Value := TextBetween(PageText, '<h3 class=r>', '</a>'); Address := TextBetween(Value, '<a href="', 'maindetails"'); if (GetOption('AllActors') = 1) or (GetOption('Producer') = 1) then if Address <> '' then Address := Address + 'combined'; if Address <> '' then AnalyzeResultsPage(Address); end; end; // ***** adds the movie titles found on IMDB's results page ***** function AddMovieTitles(List: string): Boolean; var Value: string; Address: string; begin Result := False; Value := TextBetween(List, '.</td><td valign="top">', '</td>'); if GetOption('HideAkaTitles') = 1 then Value := StringReplace(Value, TextAfter(Value, '<br> '), '') else Value := StringReplace(Value, 'aka', ' | aka'); List := RemainingText; while Value <> '' do begin Address := TextBetween(Value, '<a href="/title/tt', '/'); if (GetOption('AllActors') = 1) or (GetOption('Producer') = 1) then Address := Address + '/combined' else Address := Address + '/'; HTMLRemoveTags(Value); HTMLDecode(Value); // if GetOption('HideAkaTitles') = 1 then // Value := Value + ')'; PickTreeAdd(Value, 'http://us.imdb.com/title/tt' + Address); Result := True; Value := TextBetween(List, '.</td><td valign="top">', '</td>'); if GetOption('HideAkaTitles') = 1 then Value := StringReplace(Value, TextAfter(Value, '<br> '), '') else Value := StringReplace(Value, 'aka', ' | aka'); List := RemainingText; end; end; // ***** analyzes the page containing movie information ***** procedure AnalyzeMoviePage(PageText: string); var Value, Value2, Value3, FullValue: string; p, Count: Integer; begin MovieNumber := TextBetween(PageText, '<input type="hidden" name="auto" value="legacy/title/tt', '/"><'); if MovieNumber = '' then MovieNumber := TextBetween(PageText, '<input type="hidden" name="auto" value="legacy/title/tt', '/combined"><'); if ((GetOption('AllActors') = 1) or (GetOption('Producer') = 1)) and (Pos('<div id="tn15" class="maindetails">', PageText) > 0) then PageText := ConvertToASCII(GetPage('http://us.imdb.com/title/tt' + MovieNumber + '/combined')); MovieURL := 'http://imdb.com/title/tt' + MovieNumber; // URL if CanSetField(fieldURL) then SetField(fieldURL, MovieURL); // OriginalTitle & Year if CanSetField(fieldOriginalTitle) or CanSetField(fieldYear) then begin Value := TextBetween(PageText, '<title>', ''); p := Pos(' (1', Value); if p = 0 then p := Pos(' (2', Value); Value2 := Copy(Value, 0, p-1); Value := Copy(Value, p+2, Length(Value)); HTMLDecode(Value2); if CanSetField(fieldOriginalTitle) then SetField(fieldOriginalTitle, Value2); if Pos('/', Value) > 0 then Value2 := TextBefore(Value, '/', '') else Value2 := TextBefore(Value, ')', ''); if CanSetField(fieldYear) then SetField(fieldYear, Value2); end; // Picture if CanSetPicture then begin case GetOption('ImageKind') of 2: if not ImportSmallPicture(PageText) then ImportPictureNotAvailable(PageText); 3: if not ImportLargePicture(PageText) then if not ImportSmallPicture(PageText) then ImportPictureNotAvailable(PageText); 4: if not ImportMerchandisingPicture then if not ImportDvdDetailsPicture then ImportSmallPicture(PageText); 5: if not ImportDvdDetailsPicture then if not ImportMerchandisingPicture then ImportSmallPicture(PageText); else ImportSmallPicture(PageText); end; end; // Director if CanSetField(fieldDirector) then begin Value := TextBetween(PageText, '
Director:', ''); if Value = '' then Value := TextBetween(PageText, '
Director', ''); if Pos(':<', Value) > 0 then Value := '<' + TextAfter(Value, ':<'); if Pos('

', Value) > 0 then Value := TextBetween(Value, '

', '

'); Value := StringReplace(Value, '
', ','); HTMLRemoveTags(Value); HTMLDecode(Value); Value := StringReplace(Value, ', more', ''); Value := StringReplace(Value, ', (more)', ''); Value := StringReplace(Value, ' ', ''); Value := StringReplace(Value, #13, ''); Value := StringReplace(Value, #10, ''); Value := StringReplace(Value, #9, ''); Value := StringReplace(Value, ',', ', '); if Copy(Value, Length(Value) - 1, 2) = ', ' then Value := Copy(Value, 0, Length(Value) - 2); SetField(fieldDirector, Value); end; // Actors if CanSetField(fieldActors) then begin Value := FullTrim(TextBetween(PageText, '', '
')); if Value <> '' then begin FullValue := ''; Count := 0; case GetOption('ActorsLayout') of 0, 1: while Pos(' 0 do begin Value2 := TextBetween(Value, ''); Value := RemainingText; if Pos('rest of cast', Value2) > 0 then Continue; if FullValue <> '' then FullValue := FullValue + #13#10; TextBefore(Value2, '', ''); Value2 := FullTrim(TextBetween(Value2, '', '')); HTMLRemoveTags(Value2); if Value2 <> '' then begin FullValue := FullValue + Value2; Count := Count + 1; end; if (Count = 10) and (GetOption('AllActors') = 2) then Break; end; 2, 3: while Pos(' 0 do begin Value2 := TextBetween(Value, ''); Value := RemainingText; if Pos('rest of cast', Value2) > 0 then Continue; if FullValue <> '' then FullValue := FullValue + #13#10; TextBefore(Value2, '', ''); Value2 := FullTrim(TextBetween(Value2, '', '')); HTMLRemoveTags(Value2); if Value2 <> '' then begin FullValue := FullValue + Value2; Value2 := FullTrim(TextBetween(RemainingText, '"char">', '')); if Value2 <> '' then FullValue := FullValue + ' (as ' + Value2 + ')'; Count := Count + 1; if (Count = 10) and (GetOption('AllActors') = 2) then Break; end; end; 4: begin FullValue := Value; FullValue := StringReplace(FullValue, ' rest of cast listed alphabetically:', ''); FullValue := StringReplace(FullValue, '> <', '><'); FullValue := StringReplace(FullValue, '', #13#10); end; end; HTMLRemoveTags(FullValue); HTMLDecode(FullValue); case GetOption('ActorsLayout') of 0, 2: FullValue := StringReplace(FullValue, #13#10, ', '); end; SetField(fieldActors, FullValue); end; end; //Country if CanSetField(fieldCountry) then begin SetField(fieldCountry, ImportList(PageText, GetOption('MultipleValuesCountry'), '/Countries/')); end; //Category if CanSetField(fieldCategory) then begin SetField(fieldCategory, ImportList(PageText, GetOption('MultipleValuesCategory'), '/Genres/')); end; // Language if CanSetField(fieldLanguages) then begin SetField(fieldLanguages, ImportList(PageText, GetOption('MultipleValuesLanguages'), '/Languages/')); end; // Audio Format if CanSetField(fieldAudioFormat) then begin SetField(fieldAudioFormat, ImportList(PageText, GetOption('MultipleValuesAudioFormat'), '/search/title?sound_mixes=')); end; // Aspect Ratio begin Value := ''; Value := TextBetween(PageText, '
Aspect Ratio:
', ''); if Pos('

', Value) > 0 then Value := TextBetween(Value, '

', '

'); if Pos(' 0 then Value := TextBefore(Value, 'Plot:
', ''); if Value = '' then Value := TextBetween(PageText, '
Plot Summary:
', ''); if Pos('

', Value) > 0 then Value := TextBetween(Value, '

', '

'); Value := StringReplace(Value, '
', ''); Value := StringReplace(Value, Textbetween(Value, '| ', ''), ''); Value := StringReplace(Value, Textbetween(Value, '', ''), ''); Value := StringReplace(Value, '| ', ''); Value := StringReplace(Value, '', ''); Value := StringReplace(Value, '»', ''); if (Value = #13#10 + #13#10) or (Value = #13#10) then Value := ''; if (GetOption('DescriptionSelection') = 0) and (Pos(' 0) then Value := TextAfter(Value, #13#10); if Value <> '' then SetField(fieldDescription, ImportSummary(Value)); end; // Length if CanSetField(fieldLength) then begin Value := TextBetween(PageText, '
Runtime:
', '
'); if Pos('

', Value) > 0 then Value := TextBetween(Value, '

', '

'); Value := TextBefore(Value, ' min', ''); HTMLRemoveTags(Value); Value := FullTrim(Value); if Value <> '' then begin if Pos(':', Value) > 0 then SetField(fieldLength, TextAfter(Value, ':')) else SetField(fieldLength, Value); end; end; // Writer (Producer Field) if CanSetField(fieldProducer) then begin if GetOption('Producer') = 1 then begin Value := TextBetween(PageText, 'Produced by
', ''); FullValue := ''; Value2 := TextBetween(Value, ''); if FullValue <> '' then FullValue := FullValue + ', '; FullValue := FullValue + Value2; Value2 := TextBetween(Value, ' 0) then begin Value2 := ''; FullValue := ConvertToASCII(GetPage(MovieURL+'/usercomments')); FullValue := TextAfter(FullValue, 'review useful:
'); while FullValue <> '' do begin Value := TextBetween(FullValue, '', '', ''); Value := RemainingText; Value2 := Value2 + #13#10 + 'Date: ' + TextBetween(Value, '', ''); Value := RemainingText; Value := 'Author: ' + TextBetween(Value, 'comments">', '
'); HtmlRemoveTags(Value); Value2 := Value2 + #13#10 + Value; Value := RemainingText; Value := TextBetween(Value, '

' + #13#10, #13#10 + '

'); Value := StringReplace(Value, #13#10, ' '); Value := StringReplace(Value, '

', #13#10); Value := StringReplace(Value, '
', #13#10); HtmlDecode(Value); Value2 := Value2 + #13#10 + #13#10 + FullTrim(Value); FullValue := TextAfter(FullValue, 'review useful:
'); end; HTMLRemoveTags(Value2); HTMLDecode(Value2); SetField(fieldComments, 'USER COMMENTS:' + Value2 + #13#10); end; end else if (GetOption('CommentType') = 0) then begin Value := TextAfter(PageText, '/comments">'); if Value <> '' then begin Value := TextBetween(Value, '

', '

'); Value := StringReplace(Value, #13#10, ' '); Value := StringReplace(Value, '
', #13#10); HTMLRemoveTags(Value); HTMLDecode(Value); Value := FullTrim(Value); while Pos(' ', Value) > 0 do Value := StringReplace(Value, ' ', ' '); while Pos(#13#10, Value) = 1 do Delete(Value, 1, 2); SetField(fieldComments, Value + #13#10); end; end else if (GetOption('CommentType') = 2) then SetField(fieldComments, ''); end; // TagLine if GetOption('GetTagline') > 0 then begin Value := TextBetween(PageText, '
Tagline:
', ''); if Pos('

', Value) > 0 then Value := TextBetween(Value, '

', '

'); if Pos(' 0 then Value := TextBefore(Value, ' '' then begin if StrGet(Value, 1) <> '"' then Value := '"' + Value + '"'; case GetOption('GetTagline') of 1: begin if GetField(fieldDescription) <> '' then Value := Value + #13#10 + #13#10 + GetField(fieldDescription); SetField(fieldDescription, Value); end; 2: begin if GetField(fieldComments) <> '' then Value := Value + #13#10 + #13#10 + GetField(fieldComments); SetField(fieldComments, Value); end; end; end; end; // Trivia if GetOption('Trivia') > 0 then begin sleep(50); Value := MovieUrl; FullValue := ConvertToASCII(GetPage(Value+'/trivia')); case GetOption('Trivia') of 1,2: Value := TextBetween(FullValue, '
', '
'); 3,4: Value := TextBetween(FullValue, 'class="soda">', #13#10 + #13#10 + #13#10 + '