Here's how to obtain all image links from an HTML document. The GetImageLinks procedure fills a TStrings object with the value of the SRC property of the IMG HTML element.
Note: if the images have relative links, you will have to parse the document url and prepend that to the image src.
Tip submitted by "mrbaseball34" (an About Delphi Programming member).
~~~~~~~~~~~~~~~~~~~~~~~~~
uses ... mshtml, ActiveX, COMObj, IdHTTP, idURI;
procedure GetImageLinks(AURL: String; AList: TStrings) ;
var
IDoc : IHTMLDocument2;
strHTML : String;
v : Variant;
x : integer;
ovLinks : OleVariant;
DocURL : String;
URI : TidURI;
ImgURL : String;
idHTTP : TidHTTP;
begin
AList.Clear;
URI := TidURI.Create(AURL) ;
try
DocURL := 'http://' + URI.Host;
if URI.Path <> '/' then
DocURL := DocURL + URI.Path;
finally
URI.Free;
end;
Idoc:=CreateComObject(Class_HTMLDOcument) as IHTMLDocument2;
try
IDoc.designMode:='on';
while IDoc.readyState<>'complete' do
Application.ProcessMessages;
v:=VarArrayCreate([0,0],VarVariant) ;
idHTTP := TidHTTP.Create(nil) ;
try
strHTML := idHTTP.Get(AURL) ;
finally
idHTTP.Free;
end;
v[0]:= strHTML;
IDoc.write(PSafeArray(System.TVarData(v).VArray)) ;
IDoc.designMode:='off';
while IDoc.readyState<>'complete' do
Application.ProcessMessages;
ovLinks := IDoc.all.tags('IMG') ;
if ovLinks.Length > 0 then
begin
for x := 0 to ovLinks.Length-1 do
begin
ImgURL := ovLinks.Item(x).src;
// The stuff below will probably need a little tweaking
// Deteriming and turning realtive URLs into absolute URLs
// is not that difficult but this is all I could come up with
// in such a short notice.
if (ImgURL[1] = '/') then
begin
// more than likely a relative URL so
// append the DocURL
ImgURL := DocURL + ImgUrl;
end
else
begin
if (Copy(ImgURL, 1, 11) = 'about:blank') then
begin
ImgURL := DocURL + Copy(ImgUrl, 12, Length(ImgURL)) ;
end;
end;
AList.Add(ImgURL) ;
end;
end;
finally
IDoc := nil;
end;
end;
//Usage
procedure TForm1.Button1Click(Sender: TObject) ;
begin
GetImageLinks('http://delphi.about.com/library/weekly/aa120704a.htm', Memo1.Lines) ;
end;
~~~~~~~~~~~~~~~~~~~~~~~~~
Note: the code uses Indy components.
Delphi tips navigator:
» Adding About Delphi Programming site (link and the blog RSS feed) to the Delphi 2005 Welcome page
« Using TWebBrowser to preview (and print) Microsoft Word documents

